xref: /aosp_15_r20/external/golang-protobuf/internal/encoding/text/encode.go (revision 1c12ee1efe575feb122dbf939ff15148a3b3e8f2)
1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved.
2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style
3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file.
4*1c12ee1eSDan Willemsen
5*1c12ee1eSDan Willemsenpackage text
6*1c12ee1eSDan Willemsen
7*1c12ee1eSDan Willemsenimport (
8*1c12ee1eSDan Willemsen	"math"
9*1c12ee1eSDan Willemsen	"math/bits"
10*1c12ee1eSDan Willemsen	"strconv"
11*1c12ee1eSDan Willemsen	"strings"
12*1c12ee1eSDan Willemsen	"unicode/utf8"
13*1c12ee1eSDan Willemsen
14*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/internal/detrand"
15*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/internal/errors"
16*1c12ee1eSDan Willemsen)
17*1c12ee1eSDan Willemsen
18*1c12ee1eSDan Willemsen// encType represents an encoding type.
19*1c12ee1eSDan Willemsentype encType uint8
20*1c12ee1eSDan Willemsen
21*1c12ee1eSDan Willemsenconst (
22*1c12ee1eSDan Willemsen	_ encType = (1 << iota) / 2
23*1c12ee1eSDan Willemsen	name
24*1c12ee1eSDan Willemsen	scalar
25*1c12ee1eSDan Willemsen	messageOpen
26*1c12ee1eSDan Willemsen	messageClose
27*1c12ee1eSDan Willemsen)
28*1c12ee1eSDan Willemsen
29*1c12ee1eSDan Willemsen// Encoder provides methods to write out textproto constructs and values. The user is
30*1c12ee1eSDan Willemsen// responsible for producing valid sequences of constructs and values.
31*1c12ee1eSDan Willemsentype Encoder struct {
32*1c12ee1eSDan Willemsen	encoderState
33*1c12ee1eSDan Willemsen
34*1c12ee1eSDan Willemsen	indent      string
35*1c12ee1eSDan Willemsen	delims      [2]byte
36*1c12ee1eSDan Willemsen	outputASCII bool
37*1c12ee1eSDan Willemsen}
38*1c12ee1eSDan Willemsen
39*1c12ee1eSDan Willemsentype encoderState struct {
40*1c12ee1eSDan Willemsen	lastType encType
41*1c12ee1eSDan Willemsen	indents  []byte
42*1c12ee1eSDan Willemsen	out      []byte
43*1c12ee1eSDan Willemsen}
44*1c12ee1eSDan Willemsen
45*1c12ee1eSDan Willemsen// NewEncoder returns an Encoder.
46*1c12ee1eSDan Willemsen//
47*1c12ee1eSDan Willemsen// If indent is a non-empty string, it causes every entry in a List or Message
48*1c12ee1eSDan Willemsen// to be preceded by the indent and trailed by a newline.
49*1c12ee1eSDan Willemsen//
50*1c12ee1eSDan Willemsen// If delims is not the zero value, it controls the delimiter characters used
51*1c12ee1eSDan Willemsen// for messages (e.g., "{}" vs "<>").
52*1c12ee1eSDan Willemsen//
53*1c12ee1eSDan Willemsen// If outputASCII is true, strings will be serialized in such a way that
54*1c12ee1eSDan Willemsen// multi-byte UTF-8 sequences are escaped. This property ensures that the
55*1c12ee1eSDan Willemsen// overall output is ASCII (as opposed to UTF-8).
56*1c12ee1eSDan Willemsenfunc NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
57*1c12ee1eSDan Willemsen	e := &Encoder{}
58*1c12ee1eSDan Willemsen	if len(indent) > 0 {
59*1c12ee1eSDan Willemsen		if strings.Trim(indent, " \t") != "" {
60*1c12ee1eSDan Willemsen			return nil, errors.New("indent may only be composed of space and tab characters")
61*1c12ee1eSDan Willemsen		}
62*1c12ee1eSDan Willemsen		e.indent = indent
63*1c12ee1eSDan Willemsen	}
64*1c12ee1eSDan Willemsen	switch delims {
65*1c12ee1eSDan Willemsen	case [2]byte{0, 0}:
66*1c12ee1eSDan Willemsen		e.delims = [2]byte{'{', '}'}
67*1c12ee1eSDan Willemsen	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
68*1c12ee1eSDan Willemsen		e.delims = delims
69*1c12ee1eSDan Willemsen	default:
70*1c12ee1eSDan Willemsen		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
71*1c12ee1eSDan Willemsen	}
72*1c12ee1eSDan Willemsen	e.outputASCII = outputASCII
73*1c12ee1eSDan Willemsen
74*1c12ee1eSDan Willemsen	return e, nil
75*1c12ee1eSDan Willemsen}
76*1c12ee1eSDan Willemsen
77*1c12ee1eSDan Willemsen// Bytes returns the content of the written bytes.
78*1c12ee1eSDan Willemsenfunc (e *Encoder) Bytes() []byte {
79*1c12ee1eSDan Willemsen	return e.out
80*1c12ee1eSDan Willemsen}
81*1c12ee1eSDan Willemsen
82*1c12ee1eSDan Willemsen// StartMessage writes out the '{' or '<' symbol.
83*1c12ee1eSDan Willemsenfunc (e *Encoder) StartMessage() {
84*1c12ee1eSDan Willemsen	e.prepareNext(messageOpen)
85*1c12ee1eSDan Willemsen	e.out = append(e.out, e.delims[0])
86*1c12ee1eSDan Willemsen}
87*1c12ee1eSDan Willemsen
88*1c12ee1eSDan Willemsen// EndMessage writes out the '}' or '>' symbol.
89*1c12ee1eSDan Willemsenfunc (e *Encoder) EndMessage() {
90*1c12ee1eSDan Willemsen	e.prepareNext(messageClose)
91*1c12ee1eSDan Willemsen	e.out = append(e.out, e.delims[1])
92*1c12ee1eSDan Willemsen}
93*1c12ee1eSDan Willemsen
94*1c12ee1eSDan Willemsen// WriteName writes out the field name and the separator ':'.
95*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteName(s string) {
96*1c12ee1eSDan Willemsen	e.prepareNext(name)
97*1c12ee1eSDan Willemsen	e.out = append(e.out, s...)
98*1c12ee1eSDan Willemsen	e.out = append(e.out, ':')
99*1c12ee1eSDan Willemsen}
100*1c12ee1eSDan Willemsen
101*1c12ee1eSDan Willemsen// WriteBool writes out the given boolean value.
102*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteBool(b bool) {
103*1c12ee1eSDan Willemsen	if b {
104*1c12ee1eSDan Willemsen		e.WriteLiteral("true")
105*1c12ee1eSDan Willemsen	} else {
106*1c12ee1eSDan Willemsen		e.WriteLiteral("false")
107*1c12ee1eSDan Willemsen	}
108*1c12ee1eSDan Willemsen}
109*1c12ee1eSDan Willemsen
110*1c12ee1eSDan Willemsen// WriteString writes out the given string value.
111*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteString(s string) {
112*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
113*1c12ee1eSDan Willemsen	e.out = appendString(e.out, s, e.outputASCII)
114*1c12ee1eSDan Willemsen}
115*1c12ee1eSDan Willemsen
116*1c12ee1eSDan Willemsenfunc appendString(out []byte, in string, outputASCII bool) []byte {
117*1c12ee1eSDan Willemsen	out = append(out, '"')
118*1c12ee1eSDan Willemsen	i := indexNeedEscapeInString(in)
119*1c12ee1eSDan Willemsen	in, out = in[i:], append(out, in[:i]...)
120*1c12ee1eSDan Willemsen	for len(in) > 0 {
121*1c12ee1eSDan Willemsen		switch r, n := utf8.DecodeRuneInString(in); {
122*1c12ee1eSDan Willemsen		case r == utf8.RuneError && n == 1:
123*1c12ee1eSDan Willemsen			// We do not report invalid UTF-8 because strings in the text format
124*1c12ee1eSDan Willemsen			// are used to represent both the proto string and bytes type.
125*1c12ee1eSDan Willemsen			r = rune(in[0])
126*1c12ee1eSDan Willemsen			fallthrough
127*1c12ee1eSDan Willemsen		case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
128*1c12ee1eSDan Willemsen			out = append(out, '\\')
129*1c12ee1eSDan Willemsen			switch r {
130*1c12ee1eSDan Willemsen			case '"', '\\':
131*1c12ee1eSDan Willemsen				out = append(out, byte(r))
132*1c12ee1eSDan Willemsen			case '\n':
133*1c12ee1eSDan Willemsen				out = append(out, 'n')
134*1c12ee1eSDan Willemsen			case '\r':
135*1c12ee1eSDan Willemsen				out = append(out, 'r')
136*1c12ee1eSDan Willemsen			case '\t':
137*1c12ee1eSDan Willemsen				out = append(out, 't')
138*1c12ee1eSDan Willemsen			default:
139*1c12ee1eSDan Willemsen				out = append(out, 'x')
140*1c12ee1eSDan Willemsen				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
141*1c12ee1eSDan Willemsen				out = strconv.AppendUint(out, uint64(r), 16)
142*1c12ee1eSDan Willemsen			}
143*1c12ee1eSDan Willemsen			in = in[n:]
144*1c12ee1eSDan Willemsen		case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
145*1c12ee1eSDan Willemsen			out = append(out, '\\')
146*1c12ee1eSDan Willemsen			if r <= math.MaxUint16 {
147*1c12ee1eSDan Willemsen				out = append(out, 'u')
148*1c12ee1eSDan Willemsen				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
149*1c12ee1eSDan Willemsen				out = strconv.AppendUint(out, uint64(r), 16)
150*1c12ee1eSDan Willemsen			} else {
151*1c12ee1eSDan Willemsen				out = append(out, 'U')
152*1c12ee1eSDan Willemsen				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
153*1c12ee1eSDan Willemsen				out = strconv.AppendUint(out, uint64(r), 16)
154*1c12ee1eSDan Willemsen			}
155*1c12ee1eSDan Willemsen			in = in[n:]
156*1c12ee1eSDan Willemsen		default:
157*1c12ee1eSDan Willemsen			i := indexNeedEscapeInString(in[n:])
158*1c12ee1eSDan Willemsen			in, out = in[n+i:], append(out, in[:n+i]...)
159*1c12ee1eSDan Willemsen		}
160*1c12ee1eSDan Willemsen	}
161*1c12ee1eSDan Willemsen	out = append(out, '"')
162*1c12ee1eSDan Willemsen	return out
163*1c12ee1eSDan Willemsen}
164*1c12ee1eSDan Willemsen
165*1c12ee1eSDan Willemsen// indexNeedEscapeInString returns the index of the character that needs
166*1c12ee1eSDan Willemsen// escaping. If no characters need escaping, this returns the input length.
167*1c12ee1eSDan Willemsenfunc indexNeedEscapeInString(s string) int {
168*1c12ee1eSDan Willemsen	for i := 0; i < len(s); i++ {
169*1c12ee1eSDan Willemsen		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
170*1c12ee1eSDan Willemsen			return i
171*1c12ee1eSDan Willemsen		}
172*1c12ee1eSDan Willemsen	}
173*1c12ee1eSDan Willemsen	return len(s)
174*1c12ee1eSDan Willemsen}
175*1c12ee1eSDan Willemsen
176*1c12ee1eSDan Willemsen// WriteFloat writes out the given float value for given bitSize.
177*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteFloat(n float64, bitSize int) {
178*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
179*1c12ee1eSDan Willemsen	e.out = appendFloat(e.out, n, bitSize)
180*1c12ee1eSDan Willemsen}
181*1c12ee1eSDan Willemsen
182*1c12ee1eSDan Willemsenfunc appendFloat(out []byte, n float64, bitSize int) []byte {
183*1c12ee1eSDan Willemsen	switch {
184*1c12ee1eSDan Willemsen	case math.IsNaN(n):
185*1c12ee1eSDan Willemsen		return append(out, "nan"...)
186*1c12ee1eSDan Willemsen	case math.IsInf(n, +1):
187*1c12ee1eSDan Willemsen		return append(out, "inf"...)
188*1c12ee1eSDan Willemsen	case math.IsInf(n, -1):
189*1c12ee1eSDan Willemsen		return append(out, "-inf"...)
190*1c12ee1eSDan Willemsen	default:
191*1c12ee1eSDan Willemsen		return strconv.AppendFloat(out, n, 'g', -1, bitSize)
192*1c12ee1eSDan Willemsen	}
193*1c12ee1eSDan Willemsen}
194*1c12ee1eSDan Willemsen
195*1c12ee1eSDan Willemsen// WriteInt writes out the given signed integer value.
196*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteInt(n int64) {
197*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
198*1c12ee1eSDan Willemsen	e.out = append(e.out, strconv.FormatInt(n, 10)...)
199*1c12ee1eSDan Willemsen}
200*1c12ee1eSDan Willemsen
201*1c12ee1eSDan Willemsen// WriteUint writes out the given unsigned integer value.
202*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteUint(n uint64) {
203*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
204*1c12ee1eSDan Willemsen	e.out = append(e.out, strconv.FormatUint(n, 10)...)
205*1c12ee1eSDan Willemsen}
206*1c12ee1eSDan Willemsen
207*1c12ee1eSDan Willemsen// WriteLiteral writes out the given string as a literal value without quotes.
208*1c12ee1eSDan Willemsen// This is used for writing enum literal strings.
209*1c12ee1eSDan Willemsenfunc (e *Encoder) WriteLiteral(s string) {
210*1c12ee1eSDan Willemsen	e.prepareNext(scalar)
211*1c12ee1eSDan Willemsen	e.out = append(e.out, s...)
212*1c12ee1eSDan Willemsen}
213*1c12ee1eSDan Willemsen
214*1c12ee1eSDan Willemsen// prepareNext adds possible space and indentation for the next value based
215*1c12ee1eSDan Willemsen// on last encType and indent option. It also updates e.lastType to next.
216*1c12ee1eSDan Willemsenfunc (e *Encoder) prepareNext(next encType) {
217*1c12ee1eSDan Willemsen	defer func() {
218*1c12ee1eSDan Willemsen		e.lastType = next
219*1c12ee1eSDan Willemsen	}()
220*1c12ee1eSDan Willemsen
221*1c12ee1eSDan Willemsen	// Single line.
222*1c12ee1eSDan Willemsen	if len(e.indent) == 0 {
223*1c12ee1eSDan Willemsen		// Add space after each field before the next one.
224*1c12ee1eSDan Willemsen		if e.lastType&(scalar|messageClose) != 0 && next == name {
225*1c12ee1eSDan Willemsen			e.out = append(e.out, ' ')
226*1c12ee1eSDan Willemsen			// Add a random extra space to make output unstable.
227*1c12ee1eSDan Willemsen			if detrand.Bool() {
228*1c12ee1eSDan Willemsen				e.out = append(e.out, ' ')
229*1c12ee1eSDan Willemsen			}
230*1c12ee1eSDan Willemsen		}
231*1c12ee1eSDan Willemsen		return
232*1c12ee1eSDan Willemsen	}
233*1c12ee1eSDan Willemsen
234*1c12ee1eSDan Willemsen	// Multi-line.
235*1c12ee1eSDan Willemsen	switch {
236*1c12ee1eSDan Willemsen	case e.lastType == name:
237*1c12ee1eSDan Willemsen		e.out = append(e.out, ' ')
238*1c12ee1eSDan Willemsen		// Add a random extra space after name: to make output unstable.
239*1c12ee1eSDan Willemsen		if detrand.Bool() {
240*1c12ee1eSDan Willemsen			e.out = append(e.out, ' ')
241*1c12ee1eSDan Willemsen		}
242*1c12ee1eSDan Willemsen
243*1c12ee1eSDan Willemsen	case e.lastType == messageOpen && next != messageClose:
244*1c12ee1eSDan Willemsen		e.indents = append(e.indents, e.indent...)
245*1c12ee1eSDan Willemsen		e.out = append(e.out, '\n')
246*1c12ee1eSDan Willemsen		e.out = append(e.out, e.indents...)
247*1c12ee1eSDan Willemsen
248*1c12ee1eSDan Willemsen	case e.lastType&(scalar|messageClose) != 0:
249*1c12ee1eSDan Willemsen		if next == messageClose {
250*1c12ee1eSDan Willemsen			e.indents = e.indents[:len(e.indents)-len(e.indent)]
251*1c12ee1eSDan Willemsen		}
252*1c12ee1eSDan Willemsen		e.out = append(e.out, '\n')
253*1c12ee1eSDan Willemsen		e.out = append(e.out, e.indents...)
254*1c12ee1eSDan Willemsen	}
255*1c12ee1eSDan Willemsen}
256*1c12ee1eSDan Willemsen
257*1c12ee1eSDan Willemsen// Snapshot returns the current snapshot for use in Reset.
258*1c12ee1eSDan Willemsenfunc (e *Encoder) Snapshot() encoderState {
259*1c12ee1eSDan Willemsen	return e.encoderState
260*1c12ee1eSDan Willemsen}
261*1c12ee1eSDan Willemsen
262*1c12ee1eSDan Willemsen// Reset resets the Encoder to the given encoderState from a Snapshot.
263*1c12ee1eSDan Willemsenfunc (e *Encoder) Reset(es encoderState) {
264*1c12ee1eSDan Willemsen	e.encoderState = es
265*1c12ee1eSDan Willemsen}
266*1c12ee1eSDan Willemsen
267*1c12ee1eSDan Willemsen// AppendString appends the escaped form of the input string to b.
268*1c12ee1eSDan Willemsenfunc AppendString(b []byte, s string) []byte {
269*1c12ee1eSDan Willemsen	return appendString(b, s, false)
270*1c12ee1eSDan Willemsen}
271