xref: /aosp_15_r20/external/golang-protobuf/internal/encoding/text/decode_token.go (revision 1c12ee1efe575feb122dbf939ff15148a3b3e8f2)
1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package text
6
7import (
8	"bytes"
9	"fmt"
10	"math"
11	"strconv"
12	"strings"
13
14	"google.golang.org/protobuf/internal/flags"
15)
16
17// Kind represents a token kind expressible in the textproto format.
18type Kind uint8
19
20// Kind values.
21const (
22	Invalid Kind = iota
23	EOF
24	Name   // Name indicates the field name.
25	Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
26	MessageOpen
27	MessageClose
28	ListOpen
29	ListClose
30
31	// comma and semi-colon are only for parsing in between values and should not be exposed.
32	comma
33	semicolon
34
35	// bof indicates beginning of file, which is the default token
36	// kind at the beginning of parsing.
37	bof = Invalid
38)
39
40func (t Kind) String() string {
41	switch t {
42	case Invalid:
43		return "<invalid>"
44	case EOF:
45		return "eof"
46	case Scalar:
47		return "scalar"
48	case Name:
49		return "name"
50	case MessageOpen:
51		return "{"
52	case MessageClose:
53		return "}"
54	case ListOpen:
55		return "["
56	case ListClose:
57		return "]"
58	case comma:
59		return ","
60	case semicolon:
61		return ";"
62	default:
63		return fmt.Sprintf("<invalid:%v>", uint8(t))
64	}
65}
66
67// NameKind represents different types of field names.
68type NameKind uint8
69
70// NameKind values.
71const (
72	IdentName NameKind = iota + 1
73	TypeName
74	FieldNumber
75)
76
77func (t NameKind) String() string {
78	switch t {
79	case IdentName:
80		return "IdentName"
81	case TypeName:
82		return "TypeName"
83	case FieldNumber:
84		return "FieldNumber"
85	default:
86		return fmt.Sprintf("<invalid:%v>", uint8(t))
87	}
88}
89
90// Bit mask in Token.attrs to indicate if a Name token is followed by the
91// separator char ':'. The field name separator char is optional for message
92// field or repeated message field, but required for all other types. Decoder
93// simply indicates whether a Name token is followed by separator or not.  It is
94// up to the prototext package to validate.
95const hasSeparator = 1 << 7
96
97// Scalar value types.
98const (
99	numberValue = iota + 1
100	stringValue
101	literalValue
102)
103
104// Bit mask in Token.numAttrs to indicate that the number is a negative.
105const isNegative = 1 << 7
106
107// Token provides a parsed token kind and value. Values are provided by the
108// different accessor methods.
109type Token struct {
110	// Kind of the Token object.
111	kind Kind
112	// attrs contains metadata for the following Kinds:
113	// Name: hasSeparator bit and one of NameKind.
114	// Scalar: one of numberValue, stringValue, literalValue.
115	attrs uint8
116	// numAttrs contains metadata for numberValue:
117	// - highest bit is whether negative or positive.
118	// - lower bits indicate one of numDec, numHex, numOct, numFloat.
119	numAttrs uint8
120	// pos provides the position of the token in the original input.
121	pos int
122	// raw bytes of the serialized token.
123	// This is a subslice into the original input.
124	raw []byte
125	// str contains parsed string for the following:
126	// - stringValue of Scalar kind
127	// - numberValue of Scalar kind
128	// - TypeName of Name kind
129	str string
130}
131
132// Kind returns the token kind.
133func (t Token) Kind() Kind {
134	return t.kind
135}
136
137// RawString returns the read value in string.
138func (t Token) RawString() string {
139	return string(t.raw)
140}
141
142// Pos returns the token position from the input.
143func (t Token) Pos() int {
144	return t.pos
145}
146
147// NameKind returns IdentName, TypeName or FieldNumber.
148// It panics if type is not Name.
149func (t Token) NameKind() NameKind {
150	if t.kind == Name {
151		return NameKind(t.attrs &^ hasSeparator)
152	}
153	panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
154}
155
156// HasSeparator returns true if the field name is followed by the separator char
157// ':', else false. It panics if type is not Name.
158func (t Token) HasSeparator() bool {
159	if t.kind == Name {
160		return t.attrs&hasSeparator != 0
161	}
162	panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
163}
164
165// IdentName returns the value for IdentName type.
166func (t Token) IdentName() string {
167	if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
168		return string(t.raw)
169	}
170	panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
171}
172
173// TypeName returns the value for TypeName type.
174func (t Token) TypeName() string {
175	if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
176		return t.str
177	}
178	panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
179}
180
181// FieldNumber returns the value for FieldNumber type. It returns a
182// non-negative int32 value. Caller will still need to validate for the correct
183// field number range.
184func (t Token) FieldNumber() int32 {
185	if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
186		panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
187	}
188	// Following should not return an error as it had already been called right
189	// before this Token was constructed.
190	num, _ := strconv.ParseInt(string(t.raw), 10, 32)
191	return int32(num)
192}
193
194// String returns the string value for a Scalar type.
195func (t Token) String() (string, bool) {
196	if t.kind != Scalar || t.attrs != stringValue {
197		return "", false
198	}
199	return t.str, true
200}
201
202// Enum returns the literal value for a Scalar type for use as enum literals.
203func (t Token) Enum() (string, bool) {
204	if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
205		return "", false
206	}
207	return string(t.raw), true
208}
209
210// Bool returns the bool value for a Scalar type.
211func (t Token) Bool() (bool, bool) {
212	if t.kind != Scalar {
213		return false, false
214	}
215	switch t.attrs {
216	case literalValue:
217		if b, ok := boolLits[string(t.raw)]; ok {
218			return b, true
219		}
220	case numberValue:
221		// Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
222		// 0x1, etc.
223		n, err := strconv.ParseUint(t.str, 0, 64)
224		if err == nil {
225			switch n {
226			case 0:
227				return false, true
228			case 1:
229				return true, true
230			}
231		}
232	}
233	return false, false
234}
235
236// These exact boolean literals are the ones supported in C++.
237var boolLits = map[string]bool{
238	"t":     true,
239	"true":  true,
240	"True":  true,
241	"f":     false,
242	"false": false,
243	"False": false,
244}
245
246// Uint64 returns the uint64 value for a Scalar type.
247func (t Token) Uint64() (uint64, bool) {
248	if t.kind != Scalar || t.attrs != numberValue ||
249		t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
250		return 0, false
251	}
252	n, err := strconv.ParseUint(t.str, 0, 64)
253	if err != nil {
254		return 0, false
255	}
256	return n, true
257}
258
259// Uint32 returns the uint32 value for a Scalar type.
260func (t Token) Uint32() (uint32, bool) {
261	if t.kind != Scalar || t.attrs != numberValue ||
262		t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
263		return 0, false
264	}
265	n, err := strconv.ParseUint(t.str, 0, 32)
266	if err != nil {
267		return 0, false
268	}
269	return uint32(n), true
270}
271
272// Int64 returns the int64 value for a Scalar type.
273func (t Token) Int64() (int64, bool) {
274	if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
275		return 0, false
276	}
277	if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
278		return n, true
279	}
280	// C++ accepts large positive hex numbers as negative values.
281	// This feature is here for proto1 backwards compatibility purposes.
282	if flags.ProtoLegacy && (t.numAttrs == numHex) {
283		if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
284			return int64(n), true
285		}
286	}
287	return 0, false
288}
289
290// Int32 returns the int32 value for a Scalar type.
291func (t Token) Int32() (int32, bool) {
292	if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
293		return 0, false
294	}
295	if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
296		return int32(n), true
297	}
298	// C++ accepts large positive hex numbers as negative values.
299	// This feature is here for proto1 backwards compatibility purposes.
300	if flags.ProtoLegacy && (t.numAttrs == numHex) {
301		if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
302			return int32(n), true
303		}
304	}
305	return 0, false
306}
307
308// Float64 returns the float64 value for a Scalar type.
309func (t Token) Float64() (float64, bool) {
310	if t.kind != Scalar {
311		return 0, false
312	}
313	switch t.attrs {
314	case literalValue:
315		if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
316			return f, true
317		}
318	case numberValue:
319		n, err := strconv.ParseFloat(t.str, 64)
320		if err == nil {
321			return n, true
322		}
323		nerr := err.(*strconv.NumError)
324		if nerr.Err == strconv.ErrRange {
325			return n, true
326		}
327	}
328	return 0, false
329}
330
331// Float32 returns the float32 value for a Scalar type.
332func (t Token) Float32() (float32, bool) {
333	if t.kind != Scalar {
334		return 0, false
335	}
336	switch t.attrs {
337	case literalValue:
338		if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
339			return float32(f), true
340		}
341	case numberValue:
342		n, err := strconv.ParseFloat(t.str, 64)
343		if err == nil {
344			// Overflows are treated as (-)infinity.
345			return float32(n), true
346		}
347		nerr := err.(*strconv.NumError)
348		if nerr.Err == strconv.ErrRange {
349			return float32(n), true
350		}
351	}
352	return 0, false
353}
354
355// These are the supported float literals which C++ permits case-insensitive
356// variants of these.
357var floatLits = map[string]float64{
358	"nan":       math.NaN(),
359	"inf":       math.Inf(1),
360	"infinity":  math.Inf(1),
361	"-inf":      math.Inf(-1),
362	"-infinity": math.Inf(-1),
363}
364
365// TokenEquals returns true if given Tokens are equal, else false.
366func TokenEquals(x, y Token) bool {
367	return x.kind == y.kind &&
368		x.attrs == y.attrs &&
369		x.numAttrs == y.numAttrs &&
370		x.pos == y.pos &&
371		bytes.Equal(x.raw, y.raw) &&
372		x.str == y.str
373}
374