1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved. 2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style 3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file. 4*1c12ee1eSDan Willemsen 5*1c12ee1eSDan Willemsenpackage text 6*1c12ee1eSDan Willemsen 7*1c12ee1eSDan Willemsenimport ( 8*1c12ee1eSDan Willemsen "math" 9*1c12ee1eSDan Willemsen "math/bits" 10*1c12ee1eSDan Willemsen "strconv" 11*1c12ee1eSDan Willemsen "strings" 12*1c12ee1eSDan Willemsen "unicode/utf8" 13*1c12ee1eSDan Willemsen 14*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/detrand" 15*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/errors" 16*1c12ee1eSDan Willemsen) 17*1c12ee1eSDan Willemsen 18*1c12ee1eSDan Willemsen// encType represents an encoding type. 19*1c12ee1eSDan Willemsentype encType uint8 20*1c12ee1eSDan Willemsen 21*1c12ee1eSDan Willemsenconst ( 22*1c12ee1eSDan Willemsen _ encType = (1 << iota) / 2 23*1c12ee1eSDan Willemsen name 24*1c12ee1eSDan Willemsen scalar 25*1c12ee1eSDan Willemsen messageOpen 26*1c12ee1eSDan Willemsen messageClose 27*1c12ee1eSDan Willemsen) 28*1c12ee1eSDan Willemsen 29*1c12ee1eSDan Willemsen// Encoder provides methods to write out textproto constructs and values. The user is 30*1c12ee1eSDan Willemsen// responsible for producing valid sequences of constructs and values. 31*1c12ee1eSDan Willemsentype Encoder struct { 32*1c12ee1eSDan Willemsen encoderState 33*1c12ee1eSDan Willemsen 34*1c12ee1eSDan Willemsen indent string 35*1c12ee1eSDan Willemsen delims [2]byte 36*1c12ee1eSDan Willemsen outputASCII bool 37*1c12ee1eSDan Willemsen} 38*1c12ee1eSDan Willemsen 39*1c12ee1eSDan Willemsentype encoderState struct { 40*1c12ee1eSDan Willemsen lastType encType 41*1c12ee1eSDan Willemsen indents []byte 42*1c12ee1eSDan Willemsen out []byte 43*1c12ee1eSDan Willemsen} 44*1c12ee1eSDan Willemsen 45*1c12ee1eSDan Willemsen// NewEncoder returns an Encoder. 46*1c12ee1eSDan Willemsen// 47*1c12ee1eSDan Willemsen// If indent is a non-empty string, it causes every entry in a List or Message 48*1c12ee1eSDan Willemsen// to be preceded by the indent and trailed by a newline. 49*1c12ee1eSDan Willemsen// 50*1c12ee1eSDan Willemsen// If delims is not the zero value, it controls the delimiter characters used 51*1c12ee1eSDan Willemsen// for messages (e.g., "{}" vs "<>"). 52*1c12ee1eSDan Willemsen// 53*1c12ee1eSDan Willemsen// If outputASCII is true, strings will be serialized in such a way that 54*1c12ee1eSDan Willemsen// multi-byte UTF-8 sequences are escaped. This property ensures that the 55*1c12ee1eSDan Willemsen// overall output is ASCII (as opposed to UTF-8). 56*1c12ee1eSDan Willemsenfunc NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) { 57*1c12ee1eSDan Willemsen e := &Encoder{} 58*1c12ee1eSDan Willemsen if len(indent) > 0 { 59*1c12ee1eSDan Willemsen if strings.Trim(indent, " \t") != "" { 60*1c12ee1eSDan Willemsen return nil, errors.New("indent may only be composed of space and tab characters") 61*1c12ee1eSDan Willemsen } 62*1c12ee1eSDan Willemsen e.indent = indent 63*1c12ee1eSDan Willemsen } 64*1c12ee1eSDan Willemsen switch delims { 65*1c12ee1eSDan Willemsen case [2]byte{0, 0}: 66*1c12ee1eSDan Willemsen e.delims = [2]byte{'{', '}'} 67*1c12ee1eSDan Willemsen case [2]byte{'{', '}'}, [2]byte{'<', '>'}: 68*1c12ee1eSDan Willemsen e.delims = delims 69*1c12ee1eSDan Willemsen default: 70*1c12ee1eSDan Willemsen return nil, errors.New("delimiters may only be \"{}\" or \"<>\"") 71*1c12ee1eSDan Willemsen } 72*1c12ee1eSDan Willemsen e.outputASCII = outputASCII 73*1c12ee1eSDan Willemsen 74*1c12ee1eSDan Willemsen return e, nil 75*1c12ee1eSDan Willemsen} 76*1c12ee1eSDan Willemsen 77*1c12ee1eSDan Willemsen// Bytes returns the content of the written bytes. 78*1c12ee1eSDan Willemsenfunc (e *Encoder) Bytes() []byte { 79*1c12ee1eSDan Willemsen return e.out 80*1c12ee1eSDan Willemsen} 81*1c12ee1eSDan Willemsen 82*1c12ee1eSDan Willemsen// StartMessage writes out the '{' or '<' symbol. 83*1c12ee1eSDan Willemsenfunc (e *Encoder) StartMessage() { 84*1c12ee1eSDan Willemsen e.prepareNext(messageOpen) 85*1c12ee1eSDan Willemsen e.out = append(e.out, e.delims[0]) 86*1c12ee1eSDan Willemsen} 87*1c12ee1eSDan Willemsen 88*1c12ee1eSDan Willemsen// EndMessage writes out the '}' or '>' symbol. 89*1c12ee1eSDan Willemsenfunc (e *Encoder) EndMessage() { 90*1c12ee1eSDan Willemsen e.prepareNext(messageClose) 91*1c12ee1eSDan Willemsen e.out = append(e.out, e.delims[1]) 92*1c12ee1eSDan Willemsen} 93*1c12ee1eSDan Willemsen 94*1c12ee1eSDan Willemsen// WriteName writes out the field name and the separator ':'. 95*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteName(s string) { 96*1c12ee1eSDan Willemsen e.prepareNext(name) 97*1c12ee1eSDan Willemsen e.out = append(e.out, s...) 98*1c12ee1eSDan Willemsen e.out = append(e.out, ':') 99*1c12ee1eSDan Willemsen} 100*1c12ee1eSDan Willemsen 101*1c12ee1eSDan Willemsen// WriteBool writes out the given boolean value. 102*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteBool(b bool) { 103*1c12ee1eSDan Willemsen if b { 104*1c12ee1eSDan Willemsen e.WriteLiteral("true") 105*1c12ee1eSDan Willemsen } else { 106*1c12ee1eSDan Willemsen e.WriteLiteral("false") 107*1c12ee1eSDan Willemsen } 108*1c12ee1eSDan Willemsen} 109*1c12ee1eSDan Willemsen 110*1c12ee1eSDan Willemsen// WriteString writes out the given string value. 111*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteString(s string) { 112*1c12ee1eSDan Willemsen e.prepareNext(scalar) 113*1c12ee1eSDan Willemsen e.out = appendString(e.out, s, e.outputASCII) 114*1c12ee1eSDan Willemsen} 115*1c12ee1eSDan Willemsen 116*1c12ee1eSDan Willemsenfunc appendString(out []byte, in string, outputASCII bool) []byte { 117*1c12ee1eSDan Willemsen out = append(out, '"') 118*1c12ee1eSDan Willemsen i := indexNeedEscapeInString(in) 119*1c12ee1eSDan Willemsen in, out = in[i:], append(out, in[:i]...) 120*1c12ee1eSDan Willemsen for len(in) > 0 { 121*1c12ee1eSDan Willemsen switch r, n := utf8.DecodeRuneInString(in); { 122*1c12ee1eSDan Willemsen case r == utf8.RuneError && n == 1: 123*1c12ee1eSDan Willemsen // We do not report invalid UTF-8 because strings in the text format 124*1c12ee1eSDan Willemsen // are used to represent both the proto string and bytes type. 125*1c12ee1eSDan Willemsen r = rune(in[0]) 126*1c12ee1eSDan Willemsen fallthrough 127*1c12ee1eSDan Willemsen case r < ' ' || r == '"' || r == '\\' || r == 0x7f: 128*1c12ee1eSDan Willemsen out = append(out, '\\') 129*1c12ee1eSDan Willemsen switch r { 130*1c12ee1eSDan Willemsen case '"', '\\': 131*1c12ee1eSDan Willemsen out = append(out, byte(r)) 132*1c12ee1eSDan Willemsen case '\n': 133*1c12ee1eSDan Willemsen out = append(out, 'n') 134*1c12ee1eSDan Willemsen case '\r': 135*1c12ee1eSDan Willemsen out = append(out, 'r') 136*1c12ee1eSDan Willemsen case '\t': 137*1c12ee1eSDan Willemsen out = append(out, 't') 138*1c12ee1eSDan Willemsen default: 139*1c12ee1eSDan Willemsen out = append(out, 'x') 140*1c12ee1eSDan Willemsen out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...) 141*1c12ee1eSDan Willemsen out = strconv.AppendUint(out, uint64(r), 16) 142*1c12ee1eSDan Willemsen } 143*1c12ee1eSDan Willemsen in = in[n:] 144*1c12ee1eSDan Willemsen case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f): 145*1c12ee1eSDan Willemsen out = append(out, '\\') 146*1c12ee1eSDan Willemsen if r <= math.MaxUint16 { 147*1c12ee1eSDan Willemsen out = append(out, 'u') 148*1c12ee1eSDan Willemsen out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) 149*1c12ee1eSDan Willemsen out = strconv.AppendUint(out, uint64(r), 16) 150*1c12ee1eSDan Willemsen } else { 151*1c12ee1eSDan Willemsen out = append(out, 'U') 152*1c12ee1eSDan Willemsen out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...) 153*1c12ee1eSDan Willemsen out = strconv.AppendUint(out, uint64(r), 16) 154*1c12ee1eSDan Willemsen } 155*1c12ee1eSDan Willemsen in = in[n:] 156*1c12ee1eSDan Willemsen default: 157*1c12ee1eSDan Willemsen i := indexNeedEscapeInString(in[n:]) 158*1c12ee1eSDan Willemsen in, out = in[n+i:], append(out, in[:n+i]...) 159*1c12ee1eSDan Willemsen } 160*1c12ee1eSDan Willemsen } 161*1c12ee1eSDan Willemsen out = append(out, '"') 162*1c12ee1eSDan Willemsen return out 163*1c12ee1eSDan Willemsen} 164*1c12ee1eSDan Willemsen 165*1c12ee1eSDan Willemsen// indexNeedEscapeInString returns the index of the character that needs 166*1c12ee1eSDan Willemsen// escaping. If no characters need escaping, this returns the input length. 167*1c12ee1eSDan Willemsenfunc indexNeedEscapeInString(s string) int { 168*1c12ee1eSDan Willemsen for i := 0; i < len(s); i++ { 169*1c12ee1eSDan Willemsen if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f { 170*1c12ee1eSDan Willemsen return i 171*1c12ee1eSDan Willemsen } 172*1c12ee1eSDan Willemsen } 173*1c12ee1eSDan Willemsen return len(s) 174*1c12ee1eSDan Willemsen} 175*1c12ee1eSDan Willemsen 176*1c12ee1eSDan Willemsen// WriteFloat writes out the given float value for given bitSize. 177*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteFloat(n float64, bitSize int) { 178*1c12ee1eSDan Willemsen e.prepareNext(scalar) 179*1c12ee1eSDan Willemsen e.out = appendFloat(e.out, n, bitSize) 180*1c12ee1eSDan Willemsen} 181*1c12ee1eSDan Willemsen 182*1c12ee1eSDan Willemsenfunc appendFloat(out []byte, n float64, bitSize int) []byte { 183*1c12ee1eSDan Willemsen switch { 184*1c12ee1eSDan Willemsen case math.IsNaN(n): 185*1c12ee1eSDan Willemsen return append(out, "nan"...) 186*1c12ee1eSDan Willemsen case math.IsInf(n, +1): 187*1c12ee1eSDan Willemsen return append(out, "inf"...) 188*1c12ee1eSDan Willemsen case math.IsInf(n, -1): 189*1c12ee1eSDan Willemsen return append(out, "-inf"...) 190*1c12ee1eSDan Willemsen default: 191*1c12ee1eSDan Willemsen return strconv.AppendFloat(out, n, 'g', -1, bitSize) 192*1c12ee1eSDan Willemsen } 193*1c12ee1eSDan Willemsen} 194*1c12ee1eSDan Willemsen 195*1c12ee1eSDan Willemsen// WriteInt writes out the given signed integer value. 196*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteInt(n int64) { 197*1c12ee1eSDan Willemsen e.prepareNext(scalar) 198*1c12ee1eSDan Willemsen e.out = append(e.out, strconv.FormatInt(n, 10)...) 199*1c12ee1eSDan Willemsen} 200*1c12ee1eSDan Willemsen 201*1c12ee1eSDan Willemsen// WriteUint writes out the given unsigned integer value. 202*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteUint(n uint64) { 203*1c12ee1eSDan Willemsen e.prepareNext(scalar) 204*1c12ee1eSDan Willemsen e.out = append(e.out, strconv.FormatUint(n, 10)...) 205*1c12ee1eSDan Willemsen} 206*1c12ee1eSDan Willemsen 207*1c12ee1eSDan Willemsen// WriteLiteral writes out the given string as a literal value without quotes. 208*1c12ee1eSDan Willemsen// This is used for writing enum literal strings. 209*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteLiteral(s string) { 210*1c12ee1eSDan Willemsen e.prepareNext(scalar) 211*1c12ee1eSDan Willemsen e.out = append(e.out, s...) 212*1c12ee1eSDan Willemsen} 213*1c12ee1eSDan Willemsen 214*1c12ee1eSDan Willemsen// prepareNext adds possible space and indentation for the next value based 215*1c12ee1eSDan Willemsen// on last encType and indent option. It also updates e.lastType to next. 216*1c12ee1eSDan Willemsenfunc (e *Encoder) prepareNext(next encType) { 217*1c12ee1eSDan Willemsen defer func() { 218*1c12ee1eSDan Willemsen e.lastType = next 219*1c12ee1eSDan Willemsen }() 220*1c12ee1eSDan Willemsen 221*1c12ee1eSDan Willemsen // Single line. 222*1c12ee1eSDan Willemsen if len(e.indent) == 0 { 223*1c12ee1eSDan Willemsen // Add space after each field before the next one. 224*1c12ee1eSDan Willemsen if e.lastType&(scalar|messageClose) != 0 && next == name { 225*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 226*1c12ee1eSDan Willemsen // Add a random extra space to make output unstable. 227*1c12ee1eSDan Willemsen if detrand.Bool() { 228*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 229*1c12ee1eSDan Willemsen } 230*1c12ee1eSDan Willemsen } 231*1c12ee1eSDan Willemsen return 232*1c12ee1eSDan Willemsen } 233*1c12ee1eSDan Willemsen 234*1c12ee1eSDan Willemsen // Multi-line. 235*1c12ee1eSDan Willemsen switch { 236*1c12ee1eSDan Willemsen case e.lastType == name: 237*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 238*1c12ee1eSDan Willemsen // Add a random extra space after name: to make output unstable. 239*1c12ee1eSDan Willemsen if detrand.Bool() { 240*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 241*1c12ee1eSDan Willemsen } 242*1c12ee1eSDan Willemsen 243*1c12ee1eSDan Willemsen case e.lastType == messageOpen && next != messageClose: 244*1c12ee1eSDan Willemsen e.indents = append(e.indents, e.indent...) 245*1c12ee1eSDan Willemsen e.out = append(e.out, '\n') 246*1c12ee1eSDan Willemsen e.out = append(e.out, e.indents...) 247*1c12ee1eSDan Willemsen 248*1c12ee1eSDan Willemsen case e.lastType&(scalar|messageClose) != 0: 249*1c12ee1eSDan Willemsen if next == messageClose { 250*1c12ee1eSDan Willemsen e.indents = e.indents[:len(e.indents)-len(e.indent)] 251*1c12ee1eSDan Willemsen } 252*1c12ee1eSDan Willemsen e.out = append(e.out, '\n') 253*1c12ee1eSDan Willemsen e.out = append(e.out, e.indents...) 254*1c12ee1eSDan Willemsen } 255*1c12ee1eSDan Willemsen} 256*1c12ee1eSDan Willemsen 257*1c12ee1eSDan Willemsen// Snapshot returns the current snapshot for use in Reset. 258*1c12ee1eSDan Willemsenfunc (e *Encoder) Snapshot() encoderState { 259*1c12ee1eSDan Willemsen return e.encoderState 260*1c12ee1eSDan Willemsen} 261*1c12ee1eSDan Willemsen 262*1c12ee1eSDan Willemsen// Reset resets the Encoder to the given encoderState from a Snapshot. 263*1c12ee1eSDan Willemsenfunc (e *Encoder) Reset(es encoderState) { 264*1c12ee1eSDan Willemsen e.encoderState = es 265*1c12ee1eSDan Willemsen} 266*1c12ee1eSDan Willemsen 267*1c12ee1eSDan Willemsen// AppendString appends the escaped form of the input string to b. 268*1c12ee1eSDan Willemsenfunc AppendString(b []byte, s string) []byte { 269*1c12ee1eSDan Willemsen return appendString(b, s, false) 270*1c12ee1eSDan Willemsen} 271