1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved. 2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style 3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file. 4*1c12ee1eSDan Willemsen 5*1c12ee1eSDan Willemsenpackage json 6*1c12ee1eSDan Willemsen 7*1c12ee1eSDan Willemsenimport ( 8*1c12ee1eSDan Willemsen "math" 9*1c12ee1eSDan Willemsen "math/bits" 10*1c12ee1eSDan Willemsen "strconv" 11*1c12ee1eSDan Willemsen "strings" 12*1c12ee1eSDan Willemsen "unicode/utf8" 13*1c12ee1eSDan Willemsen 14*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/detrand" 15*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/errors" 16*1c12ee1eSDan Willemsen) 17*1c12ee1eSDan Willemsen 18*1c12ee1eSDan Willemsen// kind represents an encoding type. 19*1c12ee1eSDan Willemsentype kind uint8 20*1c12ee1eSDan Willemsen 21*1c12ee1eSDan Willemsenconst ( 22*1c12ee1eSDan Willemsen _ kind = (1 << iota) / 2 23*1c12ee1eSDan Willemsen name 24*1c12ee1eSDan Willemsen scalar 25*1c12ee1eSDan Willemsen objectOpen 26*1c12ee1eSDan Willemsen objectClose 27*1c12ee1eSDan Willemsen arrayOpen 28*1c12ee1eSDan Willemsen arrayClose 29*1c12ee1eSDan Willemsen) 30*1c12ee1eSDan Willemsen 31*1c12ee1eSDan Willemsen// Encoder provides methods to write out JSON constructs and values. The user is 32*1c12ee1eSDan Willemsen// responsible for producing valid sequences of JSON constructs and values. 33*1c12ee1eSDan Willemsentype Encoder struct { 34*1c12ee1eSDan Willemsen indent string 35*1c12ee1eSDan Willemsen lastKind kind 36*1c12ee1eSDan Willemsen indents []byte 37*1c12ee1eSDan Willemsen out []byte 38*1c12ee1eSDan Willemsen} 39*1c12ee1eSDan Willemsen 40*1c12ee1eSDan Willemsen// NewEncoder returns an Encoder. 41*1c12ee1eSDan Willemsen// 42*1c12ee1eSDan Willemsen// If indent is a non-empty string, it causes every entry for an Array or Object 43*1c12ee1eSDan Willemsen// to be preceded by the indent and trailed by a newline. 44*1c12ee1eSDan Willemsenfunc NewEncoder(indent string) (*Encoder, error) { 45*1c12ee1eSDan Willemsen e := &Encoder{} 46*1c12ee1eSDan Willemsen if len(indent) > 0 { 47*1c12ee1eSDan Willemsen if strings.Trim(indent, " \t") != "" { 48*1c12ee1eSDan Willemsen return nil, errors.New("indent may only be composed of space or tab characters") 49*1c12ee1eSDan Willemsen } 50*1c12ee1eSDan Willemsen e.indent = indent 51*1c12ee1eSDan Willemsen } 52*1c12ee1eSDan Willemsen return e, nil 53*1c12ee1eSDan Willemsen} 54*1c12ee1eSDan Willemsen 55*1c12ee1eSDan Willemsen// Bytes returns the content of the written bytes. 56*1c12ee1eSDan Willemsenfunc (e *Encoder) Bytes() []byte { 57*1c12ee1eSDan Willemsen return e.out 58*1c12ee1eSDan Willemsen} 59*1c12ee1eSDan Willemsen 60*1c12ee1eSDan Willemsen// WriteNull writes out the null value. 61*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteNull() { 62*1c12ee1eSDan Willemsen e.prepareNext(scalar) 63*1c12ee1eSDan Willemsen e.out = append(e.out, "null"...) 64*1c12ee1eSDan Willemsen} 65*1c12ee1eSDan Willemsen 66*1c12ee1eSDan Willemsen// WriteBool writes out the given boolean value. 67*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteBool(b bool) { 68*1c12ee1eSDan Willemsen e.prepareNext(scalar) 69*1c12ee1eSDan Willemsen if b { 70*1c12ee1eSDan Willemsen e.out = append(e.out, "true"...) 71*1c12ee1eSDan Willemsen } else { 72*1c12ee1eSDan Willemsen e.out = append(e.out, "false"...) 73*1c12ee1eSDan Willemsen } 74*1c12ee1eSDan Willemsen} 75*1c12ee1eSDan Willemsen 76*1c12ee1eSDan Willemsen// WriteString writes out the given string in JSON string value. Returns error 77*1c12ee1eSDan Willemsen// if input string contains invalid UTF-8. 78*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteString(s string) error { 79*1c12ee1eSDan Willemsen e.prepareNext(scalar) 80*1c12ee1eSDan Willemsen var err error 81*1c12ee1eSDan Willemsen if e.out, err = appendString(e.out, s); err != nil { 82*1c12ee1eSDan Willemsen return err 83*1c12ee1eSDan Willemsen } 84*1c12ee1eSDan Willemsen return nil 85*1c12ee1eSDan Willemsen} 86*1c12ee1eSDan Willemsen 87*1c12ee1eSDan Willemsen// Sentinel error used for indicating invalid UTF-8. 88*1c12ee1eSDan Willemsenvar errInvalidUTF8 = errors.New("invalid UTF-8") 89*1c12ee1eSDan Willemsen 90*1c12ee1eSDan Willemsenfunc appendString(out []byte, in string) ([]byte, error) { 91*1c12ee1eSDan Willemsen out = append(out, '"') 92*1c12ee1eSDan Willemsen i := indexNeedEscapeInString(in) 93*1c12ee1eSDan Willemsen in, out = in[i:], append(out, in[:i]...) 94*1c12ee1eSDan Willemsen for len(in) > 0 { 95*1c12ee1eSDan Willemsen switch r, n := utf8.DecodeRuneInString(in); { 96*1c12ee1eSDan Willemsen case r == utf8.RuneError && n == 1: 97*1c12ee1eSDan Willemsen return out, errInvalidUTF8 98*1c12ee1eSDan Willemsen case r < ' ' || r == '"' || r == '\\': 99*1c12ee1eSDan Willemsen out = append(out, '\\') 100*1c12ee1eSDan Willemsen switch r { 101*1c12ee1eSDan Willemsen case '"', '\\': 102*1c12ee1eSDan Willemsen out = append(out, byte(r)) 103*1c12ee1eSDan Willemsen case '\b': 104*1c12ee1eSDan Willemsen out = append(out, 'b') 105*1c12ee1eSDan Willemsen case '\f': 106*1c12ee1eSDan Willemsen out = append(out, 'f') 107*1c12ee1eSDan Willemsen case '\n': 108*1c12ee1eSDan Willemsen out = append(out, 'n') 109*1c12ee1eSDan Willemsen case '\r': 110*1c12ee1eSDan Willemsen out = append(out, 'r') 111*1c12ee1eSDan Willemsen case '\t': 112*1c12ee1eSDan Willemsen out = append(out, 't') 113*1c12ee1eSDan Willemsen default: 114*1c12ee1eSDan Willemsen out = append(out, 'u') 115*1c12ee1eSDan Willemsen out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) 116*1c12ee1eSDan Willemsen out = strconv.AppendUint(out, uint64(r), 16) 117*1c12ee1eSDan Willemsen } 118*1c12ee1eSDan Willemsen in = in[n:] 119*1c12ee1eSDan Willemsen default: 120*1c12ee1eSDan Willemsen i := indexNeedEscapeInString(in[n:]) 121*1c12ee1eSDan Willemsen in, out = in[n+i:], append(out, in[:n+i]...) 122*1c12ee1eSDan Willemsen } 123*1c12ee1eSDan Willemsen } 124*1c12ee1eSDan Willemsen out = append(out, '"') 125*1c12ee1eSDan Willemsen return out, nil 126*1c12ee1eSDan Willemsen} 127*1c12ee1eSDan Willemsen 128*1c12ee1eSDan Willemsen// indexNeedEscapeInString returns the index of the character that needs 129*1c12ee1eSDan Willemsen// escaping. If no characters need escaping, this returns the input length. 130*1c12ee1eSDan Willemsenfunc indexNeedEscapeInString(s string) int { 131*1c12ee1eSDan Willemsen for i, r := range s { 132*1c12ee1eSDan Willemsen if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError { 133*1c12ee1eSDan Willemsen return i 134*1c12ee1eSDan Willemsen } 135*1c12ee1eSDan Willemsen } 136*1c12ee1eSDan Willemsen return len(s) 137*1c12ee1eSDan Willemsen} 138*1c12ee1eSDan Willemsen 139*1c12ee1eSDan Willemsen// WriteFloat writes out the given float and bitSize in JSON number value. 140*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteFloat(n float64, bitSize int) { 141*1c12ee1eSDan Willemsen e.prepareNext(scalar) 142*1c12ee1eSDan Willemsen e.out = appendFloat(e.out, n, bitSize) 143*1c12ee1eSDan Willemsen} 144*1c12ee1eSDan Willemsen 145*1c12ee1eSDan Willemsen// appendFloat formats given float in bitSize, and appends to the given []byte. 146*1c12ee1eSDan Willemsenfunc appendFloat(out []byte, n float64, bitSize int) []byte { 147*1c12ee1eSDan Willemsen switch { 148*1c12ee1eSDan Willemsen case math.IsNaN(n): 149*1c12ee1eSDan Willemsen return append(out, `"NaN"`...) 150*1c12ee1eSDan Willemsen case math.IsInf(n, +1): 151*1c12ee1eSDan Willemsen return append(out, `"Infinity"`...) 152*1c12ee1eSDan Willemsen case math.IsInf(n, -1): 153*1c12ee1eSDan Willemsen return append(out, `"-Infinity"`...) 154*1c12ee1eSDan Willemsen } 155*1c12ee1eSDan Willemsen 156*1c12ee1eSDan Willemsen // JSON number formatting logic based on encoding/json. 157*1c12ee1eSDan Willemsen // See floatEncoder.encode for reference. 158*1c12ee1eSDan Willemsen fmt := byte('f') 159*1c12ee1eSDan Willemsen if abs := math.Abs(n); abs != 0 { 160*1c12ee1eSDan Willemsen if bitSize == 64 && (abs < 1e-6 || abs >= 1e21) || 161*1c12ee1eSDan Willemsen bitSize == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { 162*1c12ee1eSDan Willemsen fmt = 'e' 163*1c12ee1eSDan Willemsen } 164*1c12ee1eSDan Willemsen } 165*1c12ee1eSDan Willemsen out = strconv.AppendFloat(out, n, fmt, -1, bitSize) 166*1c12ee1eSDan Willemsen if fmt == 'e' { 167*1c12ee1eSDan Willemsen n := len(out) 168*1c12ee1eSDan Willemsen if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' { 169*1c12ee1eSDan Willemsen out[n-2] = out[n-1] 170*1c12ee1eSDan Willemsen out = out[:n-1] 171*1c12ee1eSDan Willemsen } 172*1c12ee1eSDan Willemsen } 173*1c12ee1eSDan Willemsen return out 174*1c12ee1eSDan Willemsen} 175*1c12ee1eSDan Willemsen 176*1c12ee1eSDan Willemsen// WriteInt writes out the given signed integer in JSON number value. 177*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteInt(n int64) { 178*1c12ee1eSDan Willemsen e.prepareNext(scalar) 179*1c12ee1eSDan Willemsen e.out = append(e.out, strconv.FormatInt(n, 10)...) 180*1c12ee1eSDan Willemsen} 181*1c12ee1eSDan Willemsen 182*1c12ee1eSDan Willemsen// WriteUint writes out the given unsigned integer in JSON number value. 183*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteUint(n uint64) { 184*1c12ee1eSDan Willemsen e.prepareNext(scalar) 185*1c12ee1eSDan Willemsen e.out = append(e.out, strconv.FormatUint(n, 10)...) 186*1c12ee1eSDan Willemsen} 187*1c12ee1eSDan Willemsen 188*1c12ee1eSDan Willemsen// StartObject writes out the '{' symbol. 189*1c12ee1eSDan Willemsenfunc (e *Encoder) StartObject() { 190*1c12ee1eSDan Willemsen e.prepareNext(objectOpen) 191*1c12ee1eSDan Willemsen e.out = append(e.out, '{') 192*1c12ee1eSDan Willemsen} 193*1c12ee1eSDan Willemsen 194*1c12ee1eSDan Willemsen// EndObject writes out the '}' symbol. 195*1c12ee1eSDan Willemsenfunc (e *Encoder) EndObject() { 196*1c12ee1eSDan Willemsen e.prepareNext(objectClose) 197*1c12ee1eSDan Willemsen e.out = append(e.out, '}') 198*1c12ee1eSDan Willemsen} 199*1c12ee1eSDan Willemsen 200*1c12ee1eSDan Willemsen// WriteName writes out the given string in JSON string value and the name 201*1c12ee1eSDan Willemsen// separator ':'. Returns error if input string contains invalid UTF-8, which 202*1c12ee1eSDan Willemsen// should not be likely as protobuf field names should be valid. 203*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteName(s string) error { 204*1c12ee1eSDan Willemsen e.prepareNext(name) 205*1c12ee1eSDan Willemsen var err error 206*1c12ee1eSDan Willemsen // Append to output regardless of error. 207*1c12ee1eSDan Willemsen e.out, err = appendString(e.out, s) 208*1c12ee1eSDan Willemsen e.out = append(e.out, ':') 209*1c12ee1eSDan Willemsen return err 210*1c12ee1eSDan Willemsen} 211*1c12ee1eSDan Willemsen 212*1c12ee1eSDan Willemsen// StartArray writes out the '[' symbol. 213*1c12ee1eSDan Willemsenfunc (e *Encoder) StartArray() { 214*1c12ee1eSDan Willemsen e.prepareNext(arrayOpen) 215*1c12ee1eSDan Willemsen e.out = append(e.out, '[') 216*1c12ee1eSDan Willemsen} 217*1c12ee1eSDan Willemsen 218*1c12ee1eSDan Willemsen// EndArray writes out the ']' symbol. 219*1c12ee1eSDan Willemsenfunc (e *Encoder) EndArray() { 220*1c12ee1eSDan Willemsen e.prepareNext(arrayClose) 221*1c12ee1eSDan Willemsen e.out = append(e.out, ']') 222*1c12ee1eSDan Willemsen} 223*1c12ee1eSDan Willemsen 224*1c12ee1eSDan Willemsen// prepareNext adds possible comma and indentation for the next value based 225*1c12ee1eSDan Willemsen// on last type and indent option. It also updates lastKind to next. 226*1c12ee1eSDan Willemsenfunc (e *Encoder) prepareNext(next kind) { 227*1c12ee1eSDan Willemsen defer func() { 228*1c12ee1eSDan Willemsen // Set lastKind to next. 229*1c12ee1eSDan Willemsen e.lastKind = next 230*1c12ee1eSDan Willemsen }() 231*1c12ee1eSDan Willemsen 232*1c12ee1eSDan Willemsen if len(e.indent) == 0 { 233*1c12ee1eSDan Willemsen // Need to add comma on the following condition. 234*1c12ee1eSDan Willemsen if e.lastKind&(scalar|objectClose|arrayClose) != 0 && 235*1c12ee1eSDan Willemsen next&(name|scalar|objectOpen|arrayOpen) != 0 { 236*1c12ee1eSDan Willemsen e.out = append(e.out, ',') 237*1c12ee1eSDan Willemsen // For single-line output, add a random extra space after each 238*1c12ee1eSDan Willemsen // comma to make output unstable. 239*1c12ee1eSDan Willemsen if detrand.Bool() { 240*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 241*1c12ee1eSDan Willemsen } 242*1c12ee1eSDan Willemsen } 243*1c12ee1eSDan Willemsen return 244*1c12ee1eSDan Willemsen } 245*1c12ee1eSDan Willemsen 246*1c12ee1eSDan Willemsen switch { 247*1c12ee1eSDan Willemsen case e.lastKind&(objectOpen|arrayOpen) != 0: 248*1c12ee1eSDan Willemsen // If next type is NOT closing, add indent and newline. 249*1c12ee1eSDan Willemsen if next&(objectClose|arrayClose) == 0 { 250*1c12ee1eSDan Willemsen e.indents = append(e.indents, e.indent...) 251*1c12ee1eSDan Willemsen e.out = append(e.out, '\n') 252*1c12ee1eSDan Willemsen e.out = append(e.out, e.indents...) 253*1c12ee1eSDan Willemsen } 254*1c12ee1eSDan Willemsen 255*1c12ee1eSDan Willemsen case e.lastKind&(scalar|objectClose|arrayClose) != 0: 256*1c12ee1eSDan Willemsen switch { 257*1c12ee1eSDan Willemsen // If next type is either a value or name, add comma and newline. 258*1c12ee1eSDan Willemsen case next&(name|scalar|objectOpen|arrayOpen) != 0: 259*1c12ee1eSDan Willemsen e.out = append(e.out, ',', '\n') 260*1c12ee1eSDan Willemsen 261*1c12ee1eSDan Willemsen // If next type is a closing object or array, adjust indentation. 262*1c12ee1eSDan Willemsen case next&(objectClose|arrayClose) != 0: 263*1c12ee1eSDan Willemsen e.indents = e.indents[:len(e.indents)-len(e.indent)] 264*1c12ee1eSDan Willemsen e.out = append(e.out, '\n') 265*1c12ee1eSDan Willemsen } 266*1c12ee1eSDan Willemsen e.out = append(e.out, e.indents...) 267*1c12ee1eSDan Willemsen 268*1c12ee1eSDan Willemsen case e.lastKind&name != 0: 269*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 270*1c12ee1eSDan Willemsen // For multi-line output, add a random extra space after key: to make 271*1c12ee1eSDan Willemsen // output unstable. 272*1c12ee1eSDan Willemsen if detrand.Bool() { 273*1c12ee1eSDan Willemsen e.out = append(e.out, ' ') 274*1c12ee1eSDan Willemsen } 275*1c12ee1eSDan Willemsen } 276*1c12ee1eSDan Willemsen} 277