xref: /aosp_15_r20/external/golang-protobuf/internal/encoding/text/decode_number.go (revision 1c12ee1efe575feb122dbf939ff15148a3b3e8f2)
1*1c12ee1eSDan Willemsen// Copyright 2018 The Go Authors. All rights reserved.
2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style
3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file.
4*1c12ee1eSDan Willemsen
5*1c12ee1eSDan Willemsenpackage text
6*1c12ee1eSDan Willemsen
7*1c12ee1eSDan Willemsen// parseNumberValue parses a number from the input and returns a Token object.
8*1c12ee1eSDan Willemsenfunc (d *Decoder) parseNumberValue() (Token, bool) {
9*1c12ee1eSDan Willemsen	in := d.in
10*1c12ee1eSDan Willemsen	num := parseNumber(in)
11*1c12ee1eSDan Willemsen	if num.size == 0 {
12*1c12ee1eSDan Willemsen		return Token{}, false
13*1c12ee1eSDan Willemsen	}
14*1c12ee1eSDan Willemsen	numAttrs := num.kind
15*1c12ee1eSDan Willemsen	if num.neg {
16*1c12ee1eSDan Willemsen		numAttrs |= isNegative
17*1c12ee1eSDan Willemsen	}
18*1c12ee1eSDan Willemsen	tok := Token{
19*1c12ee1eSDan Willemsen		kind:     Scalar,
20*1c12ee1eSDan Willemsen		attrs:    numberValue,
21*1c12ee1eSDan Willemsen		pos:      len(d.orig) - len(d.in),
22*1c12ee1eSDan Willemsen		raw:      d.in[:num.size],
23*1c12ee1eSDan Willemsen		str:      num.string(d.in),
24*1c12ee1eSDan Willemsen		numAttrs: numAttrs,
25*1c12ee1eSDan Willemsen	}
26*1c12ee1eSDan Willemsen	d.consume(num.size)
27*1c12ee1eSDan Willemsen	return tok, true
28*1c12ee1eSDan Willemsen}
29*1c12ee1eSDan Willemsen
30*1c12ee1eSDan Willemsenconst (
31*1c12ee1eSDan Willemsen	numDec uint8 = (1 << iota) / 2
32*1c12ee1eSDan Willemsen	numHex
33*1c12ee1eSDan Willemsen	numOct
34*1c12ee1eSDan Willemsen	numFloat
35*1c12ee1eSDan Willemsen)
36*1c12ee1eSDan Willemsen
37*1c12ee1eSDan Willemsen// number is the result of parsing out a valid number from parseNumber. It
38*1c12ee1eSDan Willemsen// contains data for doing float or integer conversion via the strconv package
39*1c12ee1eSDan Willemsen// in conjunction with the input bytes.
40*1c12ee1eSDan Willemsentype number struct {
41*1c12ee1eSDan Willemsen	kind uint8
42*1c12ee1eSDan Willemsen	neg  bool
43*1c12ee1eSDan Willemsen	size int
44*1c12ee1eSDan Willemsen	// if neg, this is the length of whitespace and comments between
45*1c12ee1eSDan Willemsen	// the minus sign and the rest fo the number literal
46*1c12ee1eSDan Willemsen	sep int
47*1c12ee1eSDan Willemsen}
48*1c12ee1eSDan Willemsen
49*1c12ee1eSDan Willemsenfunc (num number) string(data []byte) string {
50*1c12ee1eSDan Willemsen	strSize := num.size
51*1c12ee1eSDan Willemsen	last := num.size - 1
52*1c12ee1eSDan Willemsen	if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') {
53*1c12ee1eSDan Willemsen		strSize = last
54*1c12ee1eSDan Willemsen	}
55*1c12ee1eSDan Willemsen	if num.neg && num.sep > 0 {
56*1c12ee1eSDan Willemsen		// strip whitespace/comments between negative sign and the rest
57*1c12ee1eSDan Willemsen		strLen := strSize - num.sep
58*1c12ee1eSDan Willemsen		str := make([]byte, strLen)
59*1c12ee1eSDan Willemsen		str[0] = data[0]
60*1c12ee1eSDan Willemsen		copy(str[1:], data[num.sep+1:strSize])
61*1c12ee1eSDan Willemsen		return string(str)
62*1c12ee1eSDan Willemsen	}
63*1c12ee1eSDan Willemsen	return string(data[:strSize])
64*1c12ee1eSDan Willemsen
65*1c12ee1eSDan Willemsen}
66*1c12ee1eSDan Willemsen
67*1c12ee1eSDan Willemsen// parseNumber constructs a number object from given input. It allows for the
68*1c12ee1eSDan Willemsen// following patterns:
69*1c12ee1eSDan Willemsen//
70*1c12ee1eSDan Willemsen//	integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
71*1c12ee1eSDan Willemsen//	float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
72*1c12ee1eSDan Willemsen//
73*1c12ee1eSDan Willemsen// It also returns the number of parsed bytes for the given number, 0 if it is
74*1c12ee1eSDan Willemsen// not a number.
75*1c12ee1eSDan Willemsenfunc parseNumber(input []byte) number {
76*1c12ee1eSDan Willemsen	kind := numDec
77*1c12ee1eSDan Willemsen	var size int
78*1c12ee1eSDan Willemsen	var neg bool
79*1c12ee1eSDan Willemsen
80*1c12ee1eSDan Willemsen	s := input
81*1c12ee1eSDan Willemsen	if len(s) == 0 {
82*1c12ee1eSDan Willemsen		return number{}
83*1c12ee1eSDan Willemsen	}
84*1c12ee1eSDan Willemsen
85*1c12ee1eSDan Willemsen	// Optional -
86*1c12ee1eSDan Willemsen	var sep int
87*1c12ee1eSDan Willemsen	if s[0] == '-' {
88*1c12ee1eSDan Willemsen		neg = true
89*1c12ee1eSDan Willemsen		s = s[1:]
90*1c12ee1eSDan Willemsen		size++
91*1c12ee1eSDan Willemsen		// Consume any whitespace or comments between the
92*1c12ee1eSDan Willemsen		// negative sign and the rest of the number
93*1c12ee1eSDan Willemsen		lenBefore := len(s)
94*1c12ee1eSDan Willemsen		s = consume(s, 0)
95*1c12ee1eSDan Willemsen		sep = lenBefore - len(s)
96*1c12ee1eSDan Willemsen		size += sep
97*1c12ee1eSDan Willemsen		if len(s) == 0 {
98*1c12ee1eSDan Willemsen			return number{}
99*1c12ee1eSDan Willemsen		}
100*1c12ee1eSDan Willemsen	}
101*1c12ee1eSDan Willemsen
102*1c12ee1eSDan Willemsen	switch {
103*1c12ee1eSDan Willemsen	case s[0] == '0':
104*1c12ee1eSDan Willemsen		if len(s) > 1 {
105*1c12ee1eSDan Willemsen			switch {
106*1c12ee1eSDan Willemsen			case s[1] == 'x' || s[1] == 'X':
107*1c12ee1eSDan Willemsen				// Parse as hex number.
108*1c12ee1eSDan Willemsen				kind = numHex
109*1c12ee1eSDan Willemsen				n := 2
110*1c12ee1eSDan Willemsen				s = s[2:]
111*1c12ee1eSDan Willemsen				for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
112*1c12ee1eSDan Willemsen					('a' <= s[0] && s[0] <= 'f') ||
113*1c12ee1eSDan Willemsen					('A' <= s[0] && s[0] <= 'F')) {
114*1c12ee1eSDan Willemsen					s = s[1:]
115*1c12ee1eSDan Willemsen					n++
116*1c12ee1eSDan Willemsen				}
117*1c12ee1eSDan Willemsen				if n == 2 {
118*1c12ee1eSDan Willemsen					return number{}
119*1c12ee1eSDan Willemsen				}
120*1c12ee1eSDan Willemsen				size += n
121*1c12ee1eSDan Willemsen
122*1c12ee1eSDan Willemsen			case '0' <= s[1] && s[1] <= '7':
123*1c12ee1eSDan Willemsen				// Parse as octal number.
124*1c12ee1eSDan Willemsen				kind = numOct
125*1c12ee1eSDan Willemsen				n := 2
126*1c12ee1eSDan Willemsen				s = s[2:]
127*1c12ee1eSDan Willemsen				for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
128*1c12ee1eSDan Willemsen					s = s[1:]
129*1c12ee1eSDan Willemsen					n++
130*1c12ee1eSDan Willemsen				}
131*1c12ee1eSDan Willemsen				size += n
132*1c12ee1eSDan Willemsen			}
133*1c12ee1eSDan Willemsen
134*1c12ee1eSDan Willemsen			if kind&(numHex|numOct) > 0 {
135*1c12ee1eSDan Willemsen				if len(s) > 0 && !isDelim(s[0]) {
136*1c12ee1eSDan Willemsen					return number{}
137*1c12ee1eSDan Willemsen				}
138*1c12ee1eSDan Willemsen				return number{kind: kind, neg: neg, size: size, sep: sep}
139*1c12ee1eSDan Willemsen			}
140*1c12ee1eSDan Willemsen		}
141*1c12ee1eSDan Willemsen		s = s[1:]
142*1c12ee1eSDan Willemsen		size++
143*1c12ee1eSDan Willemsen
144*1c12ee1eSDan Willemsen	case '1' <= s[0] && s[0] <= '9':
145*1c12ee1eSDan Willemsen		n := 1
146*1c12ee1eSDan Willemsen		s = s[1:]
147*1c12ee1eSDan Willemsen		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
148*1c12ee1eSDan Willemsen			s = s[1:]
149*1c12ee1eSDan Willemsen			n++
150*1c12ee1eSDan Willemsen		}
151*1c12ee1eSDan Willemsen		size += n
152*1c12ee1eSDan Willemsen
153*1c12ee1eSDan Willemsen	case s[0] == '.':
154*1c12ee1eSDan Willemsen		// Set kind to numFloat to signify the intent to parse as float. And
155*1c12ee1eSDan Willemsen		// that it needs to have other digits after '.'.
156*1c12ee1eSDan Willemsen		kind = numFloat
157*1c12ee1eSDan Willemsen
158*1c12ee1eSDan Willemsen	default:
159*1c12ee1eSDan Willemsen		return number{}
160*1c12ee1eSDan Willemsen	}
161*1c12ee1eSDan Willemsen
162*1c12ee1eSDan Willemsen	// . followed by 0 or more digits.
163*1c12ee1eSDan Willemsen	if len(s) > 0 && s[0] == '.' {
164*1c12ee1eSDan Willemsen		n := 1
165*1c12ee1eSDan Willemsen		s = s[1:]
166*1c12ee1eSDan Willemsen		// If decimal point was before any digits, it should be followed by
167*1c12ee1eSDan Willemsen		// other digits.
168*1c12ee1eSDan Willemsen		if len(s) == 0 && kind == numFloat {
169*1c12ee1eSDan Willemsen			return number{}
170*1c12ee1eSDan Willemsen		}
171*1c12ee1eSDan Willemsen		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
172*1c12ee1eSDan Willemsen			s = s[1:]
173*1c12ee1eSDan Willemsen			n++
174*1c12ee1eSDan Willemsen		}
175*1c12ee1eSDan Willemsen		size += n
176*1c12ee1eSDan Willemsen		kind = numFloat
177*1c12ee1eSDan Willemsen	}
178*1c12ee1eSDan Willemsen
179*1c12ee1eSDan Willemsen	// e or E followed by an optional - or + and 1 or more digits.
180*1c12ee1eSDan Willemsen	if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
181*1c12ee1eSDan Willemsen		kind = numFloat
182*1c12ee1eSDan Willemsen		s = s[1:]
183*1c12ee1eSDan Willemsen		n := 1
184*1c12ee1eSDan Willemsen		if s[0] == '+' || s[0] == '-' {
185*1c12ee1eSDan Willemsen			s = s[1:]
186*1c12ee1eSDan Willemsen			n++
187*1c12ee1eSDan Willemsen			if len(s) == 0 {
188*1c12ee1eSDan Willemsen				return number{}
189*1c12ee1eSDan Willemsen			}
190*1c12ee1eSDan Willemsen		}
191*1c12ee1eSDan Willemsen		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
192*1c12ee1eSDan Willemsen			s = s[1:]
193*1c12ee1eSDan Willemsen			n++
194*1c12ee1eSDan Willemsen		}
195*1c12ee1eSDan Willemsen		size += n
196*1c12ee1eSDan Willemsen	}
197*1c12ee1eSDan Willemsen
198*1c12ee1eSDan Willemsen	// Optional suffix f or F for floats.
199*1c12ee1eSDan Willemsen	if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
200*1c12ee1eSDan Willemsen		kind = numFloat
201*1c12ee1eSDan Willemsen		s = s[1:]
202*1c12ee1eSDan Willemsen		size++
203*1c12ee1eSDan Willemsen	}
204*1c12ee1eSDan Willemsen
205*1c12ee1eSDan Willemsen	// Check that next byte is a delimiter or it is at the end.
206*1c12ee1eSDan Willemsen	if len(s) > 0 && !isDelim(s[0]) {
207*1c12ee1eSDan Willemsen		return number{}
208*1c12ee1eSDan Willemsen	}
209*1c12ee1eSDan Willemsen
210*1c12ee1eSDan Willemsen	return number{kind: kind, neg: neg, size: size, sep: sep}
211*1c12ee1eSDan Willemsen}
212