xref: /aosp_15_r20/external/golang-protobuf/internal/encoding/json/encode.go (revision 1c12ee1efe575feb122dbf939ff15148a3b3e8f2)
1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved.
2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style
3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file.
4*1c12ee1eSDan Willemsen
5*1c12ee1eSDan Willemsenpackage json
6*1c12ee1eSDan Willemsen
7*1c12ee1eSDan Willemsenimport (
8*1c12ee1eSDan Willemsen	"math"
9*1c12ee1eSDan Willemsen	"math/bits"
10*1c12ee1eSDan Willemsen	"strconv"
11*1c12ee1eSDan Willemsen	"strings"
12*1c12ee1eSDan Willemsen	"unicode/utf8"
13*1c12ee1eSDan Willemsen
14*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/internal/detrand"
15*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/internal/errors"
16*1c12ee1eSDan Willemsen)
17*1c12ee1eSDan Willemsen
18*1c12ee1eSDan Willemsen// kind represents an encoding type.
19*1c12ee1eSDan Willemsentype kind uint8
20*1c12ee1eSDan Willemsen
21*1c12ee1eSDan Willemsenconst (
22*1c12ee1eSDan Willemsen	_ kind = (1 << iota) / 2
23*1c12ee1eSDan Willemsen	name
24*1c12ee1eSDan Willemsen	scalar
25*1c12ee1eSDan Willemsen	objectOpen
26*1c12ee1eSDan Willemsen	objectClose
27*1c12ee1eSDan Willemsen	arrayOpen
28*1c12ee1eSDan Willemsen	arrayClose
29*1c12ee1eSDan Willemsen)
30*1c12ee1eSDan Willemsen
31*1c12ee1eSDan Willemsen// Encoder provides methods to write out JSON constructs and values. The user is
32*1c12ee1eSDan Willemsen// responsible for producing valid sequences of JSON constructs and values.
33*1c12ee1eSDan Willemsentype Encoder struct {
34*1c12ee1eSDan Willemsen	indent   string
35*1c12ee1eSDan Willemsen	lastKind kind
36*1c12ee1eSDan Willemsen	indents  []byte
37*1c12ee1eSDan Willemsen	out      []byte
38*1c12ee1eSDan Willemsen}
39*1c12ee1eSDan Willemsen
40*1c12ee1eSDan Willemsen// NewEncoder returns an Encoder.
41*1c12ee1eSDan Willemsen//
42*1c12ee1eSDan Willemsen// If indent is a non-empty string, it causes every entry for an Array or Object
43*1c12ee1eSDan Willemsen// to be preceded by the indent and trailed by a newline.
44*1c12ee1eSDan Willemsenfunc NewEncoder(indent string) (*Encoder, error) {
45*1c12ee1eSDan Willemsen	e := &Encoder{}
46*1c12ee1eSDan Willemsen	if len(indent) > 0 {
47*1c12ee1eSDan Willemsen		if strings.Trim(indent, " \t") != "" {
48*1c12ee1eSDan Willemsen			return nil, errors.New("indent may only be composed of space or tab characters")
49*1c12ee1eSDan Willemsen		}
50*1c12ee1eSDan Willemsen		e.indent = indent
51*1c12ee1eSDan Willemsen	}
52*1c12ee1eSDan Willemsen	return e, nil
53*1c12ee1eSDan Willemsen}
54*1c12ee1eSDan Willemsen
55*1c12ee1eSDan Willemsen// Bytes returns the content of the written bytes.
56*1c12ee1eSDan Willemsenfunc (e *Encoder) Bytes() []byte {
57*1c12ee1eSDan Willemsen	return e.out
58*1c12ee1eSDan Willemsen}
59*1c12ee1eSDan Willemsen
60*1c12ee1eSDan Willemsen// WriteNull writes out the null value.
61*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteNull() {
62*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
63*1c12ee1eSDan Willemsen	e.out = append(e.out, "null"...)
64*1c12ee1eSDan Willemsen}
65*1c12ee1eSDan Willemsen
66*1c12ee1eSDan Willemsen// WriteBool writes out the given boolean value.
67*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteBool(b bool) {
68*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
69*1c12ee1eSDan Willemsen	if b {
70*1c12ee1eSDan Willemsen		e.out = append(e.out, "true"...)
71*1c12ee1eSDan Willemsen	} else {
72*1c12ee1eSDan Willemsen		e.out = append(e.out, "false"...)
73*1c12ee1eSDan Willemsen	}
74*1c12ee1eSDan Willemsen}
75*1c12ee1eSDan Willemsen
76*1c12ee1eSDan Willemsen// WriteString writes out the given string in JSON string value. Returns error
77*1c12ee1eSDan Willemsen// if input string contains invalid UTF-8.
78*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteString(s string) error {
79*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
80*1c12ee1eSDan Willemsen	var err error
81*1c12ee1eSDan Willemsen	if e.out, err = appendString(e.out, s); err != nil {
82*1c12ee1eSDan Willemsen		return err
83*1c12ee1eSDan Willemsen	}
84*1c12ee1eSDan Willemsen	return nil
85*1c12ee1eSDan Willemsen}
86*1c12ee1eSDan Willemsen
87*1c12ee1eSDan Willemsen// Sentinel error used for indicating invalid UTF-8.
88*1c12ee1eSDan Willemsenvar errInvalidUTF8 = errors.New("invalid UTF-8")
89*1c12ee1eSDan Willemsen
90*1c12ee1eSDan Willemsenfunc appendString(out []byte, in string) ([]byte, error) {
91*1c12ee1eSDan Willemsen	out = append(out, '"')
92*1c12ee1eSDan Willemsen	i := indexNeedEscapeInString(in)
93*1c12ee1eSDan Willemsen	in, out = in[i:], append(out, in[:i]...)
94*1c12ee1eSDan Willemsen	for len(in) > 0 {
95*1c12ee1eSDan Willemsen		switch r, n := utf8.DecodeRuneInString(in); {
96*1c12ee1eSDan Willemsen		case r == utf8.RuneError && n == 1:
97*1c12ee1eSDan Willemsen			return out, errInvalidUTF8
98*1c12ee1eSDan Willemsen		case r < ' ' || r == '"' || r == '\\':
99*1c12ee1eSDan Willemsen			out = append(out, '\\')
100*1c12ee1eSDan Willemsen			switch r {
101*1c12ee1eSDan Willemsen			case '"', '\\':
102*1c12ee1eSDan Willemsen				out = append(out, byte(r))
103*1c12ee1eSDan Willemsen			case '\b':
104*1c12ee1eSDan Willemsen				out = append(out, 'b')
105*1c12ee1eSDan Willemsen			case '\f':
106*1c12ee1eSDan Willemsen				out = append(out, 'f')
107*1c12ee1eSDan Willemsen			case '\n':
108*1c12ee1eSDan Willemsen				out = append(out, 'n')
109*1c12ee1eSDan Willemsen			case '\r':
110*1c12ee1eSDan Willemsen				out = append(out, 'r')
111*1c12ee1eSDan Willemsen			case '\t':
112*1c12ee1eSDan Willemsen				out = append(out, 't')
113*1c12ee1eSDan Willemsen			default:
114*1c12ee1eSDan Willemsen				out = append(out, 'u')
115*1c12ee1eSDan Willemsen				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
116*1c12ee1eSDan Willemsen				out = strconv.AppendUint(out, uint64(r), 16)
117*1c12ee1eSDan Willemsen			}
118*1c12ee1eSDan Willemsen			in = in[n:]
119*1c12ee1eSDan Willemsen		default:
120*1c12ee1eSDan Willemsen			i := indexNeedEscapeInString(in[n:])
121*1c12ee1eSDan Willemsen			in, out = in[n+i:], append(out, in[:n+i]...)
122*1c12ee1eSDan Willemsen		}
123*1c12ee1eSDan Willemsen	}
124*1c12ee1eSDan Willemsen	out = append(out, '"')
125*1c12ee1eSDan Willemsen	return out, nil
126*1c12ee1eSDan Willemsen}
127*1c12ee1eSDan Willemsen
128*1c12ee1eSDan Willemsen// indexNeedEscapeInString returns the index of the character that needs
129*1c12ee1eSDan Willemsen// escaping. If no characters need escaping, this returns the input length.
130*1c12ee1eSDan Willemsenfunc indexNeedEscapeInString(s string) int {
131*1c12ee1eSDan Willemsen	for i, r := range s {
132*1c12ee1eSDan Willemsen		if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
133*1c12ee1eSDan Willemsen			return i
134*1c12ee1eSDan Willemsen		}
135*1c12ee1eSDan Willemsen	}
136*1c12ee1eSDan Willemsen	return len(s)
137*1c12ee1eSDan Willemsen}
138*1c12ee1eSDan Willemsen
139*1c12ee1eSDan Willemsen// WriteFloat writes out the given float and bitSize in JSON number value.
140*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteFloat(n float64, bitSize int) {
141*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
142*1c12ee1eSDan Willemsen	e.out = appendFloat(e.out, n, bitSize)
143*1c12ee1eSDan Willemsen}
144*1c12ee1eSDan Willemsen
145*1c12ee1eSDan Willemsen// appendFloat formats given float in bitSize, and appends to the given []byte.
146*1c12ee1eSDan Willemsenfunc appendFloat(out []byte, n float64, bitSize int) []byte {
147*1c12ee1eSDan Willemsen	switch {
148*1c12ee1eSDan Willemsen	case math.IsNaN(n):
149*1c12ee1eSDan Willemsen		return append(out, `"NaN"`...)
150*1c12ee1eSDan Willemsen	case math.IsInf(n, +1):
151*1c12ee1eSDan Willemsen		return append(out, `"Infinity"`...)
152*1c12ee1eSDan Willemsen	case math.IsInf(n, -1):
153*1c12ee1eSDan Willemsen		return append(out, `"-Infinity"`...)
154*1c12ee1eSDan Willemsen	}
155*1c12ee1eSDan Willemsen
156*1c12ee1eSDan Willemsen	// JSON number formatting logic based on encoding/json.
157*1c12ee1eSDan Willemsen	// See floatEncoder.encode for reference.
158*1c12ee1eSDan Willemsen	fmt := byte('f')
159*1c12ee1eSDan Willemsen	if abs := math.Abs(n); abs != 0 {
160*1c12ee1eSDan Willemsen		if bitSize == 64 && (abs < 1e-6 || abs >= 1e21) ||
161*1c12ee1eSDan Willemsen			bitSize == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
162*1c12ee1eSDan Willemsen			fmt = 'e'
163*1c12ee1eSDan Willemsen		}
164*1c12ee1eSDan Willemsen	}
165*1c12ee1eSDan Willemsen	out = strconv.AppendFloat(out, n, fmt, -1, bitSize)
166*1c12ee1eSDan Willemsen	if fmt == 'e' {
167*1c12ee1eSDan Willemsen		n := len(out)
168*1c12ee1eSDan Willemsen		if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
169*1c12ee1eSDan Willemsen			out[n-2] = out[n-1]
170*1c12ee1eSDan Willemsen			out = out[:n-1]
171*1c12ee1eSDan Willemsen		}
172*1c12ee1eSDan Willemsen	}
173*1c12ee1eSDan Willemsen	return out
174*1c12ee1eSDan Willemsen}
175*1c12ee1eSDan Willemsen
176*1c12ee1eSDan Willemsen// WriteInt writes out the given signed integer in JSON number value.
177*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteInt(n int64) {
178*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
179*1c12ee1eSDan Willemsen	e.out = append(e.out, strconv.FormatInt(n, 10)...)
180*1c12ee1eSDan Willemsen}
181*1c12ee1eSDan Willemsen
182*1c12ee1eSDan Willemsen// WriteUint writes out the given unsigned integer in JSON number value.
183*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteUint(n uint64) {
184*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
185*1c12ee1eSDan Willemsen	e.out = append(e.out, strconv.FormatUint(n, 10)...)
186*1c12ee1eSDan Willemsen}
187*1c12ee1eSDan Willemsen
188*1c12ee1eSDan Willemsen// StartObject writes out the '{' symbol.
189*1c12ee1eSDan Willemsenfunc (e *Encoder) StartObject() {
190*1c12ee1eSDan Willemsen	e.prepareNext(objectOpen)
191*1c12ee1eSDan Willemsen	e.out = append(e.out, '{')
192*1c12ee1eSDan Willemsen}
193*1c12ee1eSDan Willemsen
194*1c12ee1eSDan Willemsen// EndObject writes out the '}' symbol.
195*1c12ee1eSDan Willemsenfunc (e *Encoder) EndObject() {
196*1c12ee1eSDan Willemsen	e.prepareNext(objectClose)
197*1c12ee1eSDan Willemsen	e.out = append(e.out, '}')
198*1c12ee1eSDan Willemsen}
199*1c12ee1eSDan Willemsen
200*1c12ee1eSDan Willemsen// WriteName writes out the given string in JSON string value and the name
201*1c12ee1eSDan Willemsen// separator ':'. Returns error if input string contains invalid UTF-8, which
202*1c12ee1eSDan Willemsen// should not be likely as protobuf field names should be valid.
203*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteName(s string) error {
204*1c12ee1eSDan Willemsen	e.prepareNext(name)
205*1c12ee1eSDan Willemsen	var err error
206*1c12ee1eSDan Willemsen	// Append to output regardless of error.
207*1c12ee1eSDan Willemsen	e.out, err = appendString(e.out, s)
208*1c12ee1eSDan Willemsen	e.out = append(e.out, ':')
209*1c12ee1eSDan Willemsen	return err
210*1c12ee1eSDan Willemsen}
211*1c12ee1eSDan Willemsen
212*1c12ee1eSDan Willemsen// StartArray writes out the '[' symbol.
213*1c12ee1eSDan Willemsenfunc (e *Encoder) StartArray() {
214*1c12ee1eSDan Willemsen	e.prepareNext(arrayOpen)
215*1c12ee1eSDan Willemsen	e.out = append(e.out, '[')
216*1c12ee1eSDan Willemsen}
217*1c12ee1eSDan Willemsen
218*1c12ee1eSDan Willemsen// EndArray writes out the ']' symbol.
219*1c12ee1eSDan Willemsenfunc (e *Encoder) EndArray() {
220*1c12ee1eSDan Willemsen	e.prepareNext(arrayClose)
221*1c12ee1eSDan Willemsen	e.out = append(e.out, ']')
222*1c12ee1eSDan Willemsen}
223*1c12ee1eSDan Willemsen
224*1c12ee1eSDan Willemsen// prepareNext adds possible comma and indentation for the next value based
225*1c12ee1eSDan Willemsen// on last type and indent option. It also updates lastKind to next.
226*1c12ee1eSDan Willemsenfunc (e *Encoder) prepareNext(next kind) {
227*1c12ee1eSDan Willemsen	defer func() {
228*1c12ee1eSDan Willemsen		// Set lastKind to next.
229*1c12ee1eSDan Willemsen		e.lastKind = next
230*1c12ee1eSDan Willemsen	}()
231*1c12ee1eSDan Willemsen
232*1c12ee1eSDan Willemsen	if len(e.indent) == 0 {
233*1c12ee1eSDan Willemsen		// Need to add comma on the following condition.
234*1c12ee1eSDan Willemsen		if e.lastKind&(scalar|objectClose|arrayClose) != 0 &&
235*1c12ee1eSDan Willemsen			next&(name|scalar|objectOpen|arrayOpen) != 0 {
236*1c12ee1eSDan Willemsen			e.out = append(e.out, ',')
237*1c12ee1eSDan Willemsen			// For single-line output, add a random extra space after each
238*1c12ee1eSDan Willemsen			// comma to make output unstable.
239*1c12ee1eSDan Willemsen			if detrand.Bool() {
240*1c12ee1eSDan Willemsen				e.out = append(e.out, ' ')
241*1c12ee1eSDan Willemsen			}
242*1c12ee1eSDan Willemsen		}
243*1c12ee1eSDan Willemsen		return
244*1c12ee1eSDan Willemsen	}
245*1c12ee1eSDan Willemsen
246*1c12ee1eSDan Willemsen	switch {
247*1c12ee1eSDan Willemsen	case e.lastKind&(objectOpen|arrayOpen) != 0:
248*1c12ee1eSDan Willemsen		// If next type is NOT closing, add indent and newline.
249*1c12ee1eSDan Willemsen		if next&(objectClose|arrayClose) == 0 {
250*1c12ee1eSDan Willemsen			e.indents = append(e.indents, e.indent...)
251*1c12ee1eSDan Willemsen			e.out = append(e.out, '\n')
252*1c12ee1eSDan Willemsen			e.out = append(e.out, e.indents...)
253*1c12ee1eSDan Willemsen		}
254*1c12ee1eSDan Willemsen
255*1c12ee1eSDan Willemsen	case e.lastKind&(scalar|objectClose|arrayClose) != 0:
256*1c12ee1eSDan Willemsen		switch {
257*1c12ee1eSDan Willemsen		// If next type is either a value or name, add comma and newline.
258*1c12ee1eSDan Willemsen		case next&(name|scalar|objectOpen|arrayOpen) != 0:
259*1c12ee1eSDan Willemsen			e.out = append(e.out, ',', '\n')
260*1c12ee1eSDan Willemsen
261*1c12ee1eSDan Willemsen		// If next type is a closing object or array, adjust indentation.
262*1c12ee1eSDan Willemsen		case next&(objectClose|arrayClose) != 0:
263*1c12ee1eSDan Willemsen			e.indents = e.indents[:len(e.indents)-len(e.indent)]
264*1c12ee1eSDan Willemsen			e.out = append(e.out, '\n')
265*1c12ee1eSDan Willemsen		}
266*1c12ee1eSDan Willemsen		e.out = append(e.out, e.indents...)
267*1c12ee1eSDan Willemsen
268*1c12ee1eSDan Willemsen	case e.lastKind&name != 0:
269*1c12ee1eSDan Willemsen		e.out = append(e.out, ' ')
270*1c12ee1eSDan Willemsen		// For multi-line output, add a random extra space after key: to make
271*1c12ee1eSDan Willemsen		// output unstable.
272*1c12ee1eSDan Willemsen		if detrand.Bool() {
273*1c12ee1eSDan Willemsen			e.out = append(e.out, ' ')
274*1c12ee1eSDan Willemsen		}
275*1c12ee1eSDan Willemsen	}
276*1c12ee1eSDan Willemsen}
277