1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package strconv
6
7// decimal to binary floating point conversion.
8// Algorithm:
9//   1) Store input in multiprecision decimal.
10//   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
11//   3) Multiply by 2^precision and round to get mantissa.
12
13import "math"
14
15var optimize = true // set to false to force slow-path conversions for testing
16
17// commonPrefixLenIgnoreCase returns the length of the common
18// prefix of s and prefix, with the character case of s ignored.
19// The prefix argument must be all lower-case.
20func commonPrefixLenIgnoreCase(s, prefix string) int {
21	n := len(prefix)
22	if n > len(s) {
23		n = len(s)
24	}
25	for i := 0; i < n; i++ {
26		c := s[i]
27		if 'A' <= c && c <= 'Z' {
28			c += 'a' - 'A'
29		}
30		if c != prefix[i] {
31			return i
32		}
33	}
34	return n
35}
36
37// special returns the floating-point value for the special,
38// possibly signed floating-point representations inf, infinity,
39// and NaN. The result is ok if a prefix of s contains one
40// of these representations and n is the length of that prefix.
41// The character case is ignored.
42func special(s string) (f float64, n int, ok bool) {
43	if len(s) == 0 {
44		return 0, 0, false
45	}
46	sign := 1
47	nsign := 0
48	switch s[0] {
49	case '+', '-':
50		if s[0] == '-' {
51			sign = -1
52		}
53		nsign = 1
54		s = s[1:]
55		fallthrough
56	case 'i', 'I':
57		n := commonPrefixLenIgnoreCase(s, "infinity")
58		// Anything longer than "inf" is ok, but if we
59		// don't have "infinity", only consume "inf".
60		if 3 < n && n < 8 {
61			n = 3
62		}
63		if n == 3 || n == 8 {
64			return math.Inf(sign), nsign + n, true
65		}
66	case 'n', 'N':
67		if commonPrefixLenIgnoreCase(s, "nan") == 3 {
68			return math.NaN(), 3, true
69		}
70	}
71	return 0, 0, false
72}
73
74func (b *decimal) set(s string) (ok bool) {
75	i := 0
76	b.neg = false
77	b.trunc = false
78
79	// optional sign
80	if i >= len(s) {
81		return
82	}
83	switch {
84	case s[i] == '+':
85		i++
86	case s[i] == '-':
87		b.neg = true
88		i++
89	}
90
91	// digits
92	sawdot := false
93	sawdigits := false
94	for ; i < len(s); i++ {
95		switch {
96		case s[i] == '_':
97			// readFloat already checked underscores
98			continue
99		case s[i] == '.':
100			if sawdot {
101				return
102			}
103			sawdot = true
104			b.dp = b.nd
105			continue
106
107		case '0' <= s[i] && s[i] <= '9':
108			sawdigits = true
109			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
110				b.dp--
111				continue
112			}
113			if b.nd < len(b.d) {
114				b.d[b.nd] = s[i]
115				b.nd++
116			} else if s[i] != '0' {
117				b.trunc = true
118			}
119			continue
120		}
121		break
122	}
123	if !sawdigits {
124		return
125	}
126	if !sawdot {
127		b.dp = b.nd
128	}
129
130	// optional exponent moves decimal point.
131	// if we read a very large, very long number,
132	// just be sure to move the decimal point by
133	// a lot (say, 100000).  it doesn't matter if it's
134	// not the exact number.
135	if i < len(s) && lower(s[i]) == 'e' {
136		i++
137		if i >= len(s) {
138			return
139		}
140		esign := 1
141		if s[i] == '+' {
142			i++
143		} else if s[i] == '-' {
144			i++
145			esign = -1
146		}
147		if i >= len(s) || s[i] < '0' || s[i] > '9' {
148			return
149		}
150		e := 0
151		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
152			if s[i] == '_' {
153				// readFloat already checked underscores
154				continue
155			}
156			if e < 10000 {
157				e = e*10 + int(s[i]) - '0'
158			}
159		}
160		b.dp += e * esign
161	}
162
163	if i != len(s) {
164		return
165	}
166
167	ok = true
168	return
169}
170
171// readFloat reads a decimal or hexadecimal mantissa and exponent from a float
172// string representation in s; the number may be followed by other characters.
173// readFloat reports the number of bytes consumed (i), and whether the number
174// is valid (ok).
175func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
176	underscores := false
177
178	// optional sign
179	if i >= len(s) {
180		return
181	}
182	switch {
183	case s[i] == '+':
184		i++
185	case s[i] == '-':
186		neg = true
187		i++
188	}
189
190	// digits
191	base := uint64(10)
192	maxMantDigits := 19 // 10^19 fits in uint64
193	expChar := byte('e')
194	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
195		base = 16
196		maxMantDigits = 16 // 16^16 fits in uint64
197		i += 2
198		expChar = 'p'
199		hex = true
200	}
201	sawdot := false
202	sawdigits := false
203	nd := 0
204	ndMant := 0
205	dp := 0
206loop:
207	for ; i < len(s); i++ {
208		switch c := s[i]; true {
209		case c == '_':
210			underscores = true
211			continue
212
213		case c == '.':
214			if sawdot {
215				break loop
216			}
217			sawdot = true
218			dp = nd
219			continue
220
221		case '0' <= c && c <= '9':
222			sawdigits = true
223			if c == '0' && nd == 0 { // ignore leading zeros
224				dp--
225				continue
226			}
227			nd++
228			if ndMant < maxMantDigits {
229				mantissa *= base
230				mantissa += uint64(c - '0')
231				ndMant++
232			} else if c != '0' {
233				trunc = true
234			}
235			continue
236
237		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
238			sawdigits = true
239			nd++
240			if ndMant < maxMantDigits {
241				mantissa *= 16
242				mantissa += uint64(lower(c) - 'a' + 10)
243				ndMant++
244			} else {
245				trunc = true
246			}
247			continue
248		}
249		break
250	}
251	if !sawdigits {
252		return
253	}
254	if !sawdot {
255		dp = nd
256	}
257
258	if base == 16 {
259		dp *= 4
260		ndMant *= 4
261	}
262
263	// optional exponent moves decimal point.
264	// if we read a very large, very long number,
265	// just be sure to move the decimal point by
266	// a lot (say, 100000).  it doesn't matter if it's
267	// not the exact number.
268	if i < len(s) && lower(s[i]) == expChar {
269		i++
270		if i >= len(s) {
271			return
272		}
273		esign := 1
274		if s[i] == '+' {
275			i++
276		} else if s[i] == '-' {
277			i++
278			esign = -1
279		}
280		if i >= len(s) || s[i] < '0' || s[i] > '9' {
281			return
282		}
283		e := 0
284		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
285			if s[i] == '_' {
286				underscores = true
287				continue
288			}
289			if e < 10000 {
290				e = e*10 + int(s[i]) - '0'
291			}
292		}
293		dp += e * esign
294	} else if base == 16 {
295		// Must have exponent.
296		return
297	}
298
299	if mantissa != 0 {
300		exp = dp - ndMant
301	}
302
303	if underscores && !underscoreOK(s[:i]) {
304		return
305	}
306
307	ok = true
308	return
309}
310
311// decimal power of ten to binary power of two.
312var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
313
314func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
315	var exp int
316	var mant uint64
317
318	// Zero is always a special case.
319	if d.nd == 0 {
320		mant = 0
321		exp = flt.bias
322		goto out
323	}
324
325	// Obvious overflow/underflow.
326	// These bounds are for 64-bit floats.
327	// Will have to change if we want to support 80-bit floats in the future.
328	if d.dp > 310 {
329		goto overflow
330	}
331	if d.dp < -330 {
332		// zero
333		mant = 0
334		exp = flt.bias
335		goto out
336	}
337
338	// Scale by powers of two until in range [0.5, 1.0)
339	exp = 0
340	for d.dp > 0 {
341		var n int
342		if d.dp >= len(powtab) {
343			n = 27
344		} else {
345			n = powtab[d.dp]
346		}
347		d.Shift(-n)
348		exp += n
349	}
350	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
351		var n int
352		if -d.dp >= len(powtab) {
353			n = 27
354		} else {
355			n = powtab[-d.dp]
356		}
357		d.Shift(n)
358		exp -= n
359	}
360
361	// Our range is [0.5,1) but floating point range is [1,2).
362	exp--
363
364	// Minimum representable exponent is flt.bias+1.
365	// If the exponent is smaller, move it up and
366	// adjust d accordingly.
367	if exp < flt.bias+1 {
368		n := flt.bias + 1 - exp
369		d.Shift(-n)
370		exp += n
371	}
372
373	if exp-flt.bias >= 1<<flt.expbits-1 {
374		goto overflow
375	}
376
377	// Extract 1+flt.mantbits bits.
378	d.Shift(int(1 + flt.mantbits))
379	mant = d.RoundedInteger()
380
381	// Rounding might have added a bit; shift down.
382	if mant == 2<<flt.mantbits {
383		mant >>= 1
384		exp++
385		if exp-flt.bias >= 1<<flt.expbits-1 {
386			goto overflow
387		}
388	}
389
390	// Denormalized?
391	if mant&(1<<flt.mantbits) == 0 {
392		exp = flt.bias
393	}
394	goto out
395
396overflow:
397	// ±Inf
398	mant = 0
399	exp = 1<<flt.expbits - 1 + flt.bias
400	overflow = true
401
402out:
403	// Assemble bits.
404	bits := mant & (uint64(1)<<flt.mantbits - 1)
405	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
406	if d.neg {
407		bits |= 1 << flt.mantbits << flt.expbits
408	}
409	return bits, overflow
410}
411
412// Exact powers of 10.
413var float64pow10 = []float64{
414	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
415	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
416	1e20, 1e21, 1e22,
417}
418var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
419
420// If possible to convert decimal representation to 64-bit float f exactly,
421// entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
422// Three common cases:
423//
424//	value is exact integer
425//	value is exact integer * exact power of ten
426//	value is exact integer / exact power of ten
427//
428// These all produce potentially inexact but correctly rounded answers.
429func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
430	if mantissa>>float64info.mantbits != 0 {
431		return
432	}
433	f = float64(mantissa)
434	if neg {
435		f = -f
436	}
437	switch {
438	case exp == 0:
439		// an integer.
440		return f, true
441	// Exact integers are <= 10^15.
442	// Exact powers of ten are <= 10^22.
443	case exp > 0 && exp <= 15+22: // int * 10^k
444		// If exponent is big but number of digits is not,
445		// can move a few zeros into the integer part.
446		if exp > 22 {
447			f *= float64pow10[exp-22]
448			exp = 22
449		}
450		if f > 1e15 || f < -1e15 {
451			// the exponent was really too large.
452			return
453		}
454		return f * float64pow10[exp], true
455	case exp < 0 && exp >= -22: // int / 10^k
456		return f / float64pow10[-exp], true
457	}
458	return
459}
460
461// If possible to compute mantissa*10^exp to 32-bit float f exactly,
462// entirely in floating-point math, do so, avoiding the machinery above.
463func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
464	if mantissa>>float32info.mantbits != 0 {
465		return
466	}
467	f = float32(mantissa)
468	if neg {
469		f = -f
470	}
471	switch {
472	case exp == 0:
473		return f, true
474	// Exact integers are <= 10^7.
475	// Exact powers of ten are <= 10^10.
476	case exp > 0 && exp <= 7+10: // int * 10^k
477		// If exponent is big but number of digits is not,
478		// can move a few zeros into the integer part.
479		if exp > 10 {
480			f *= float32pow10[exp-10]
481			exp = 10
482		}
483		if f > 1e7 || f < -1e7 {
484			// the exponent was really too large.
485			return
486		}
487		return f * float32pow10[exp], true
488	case exp < 0 && exp >= -10: // int / 10^k
489		return f / float32pow10[-exp], true
490	}
491	return
492}
493
494// atofHex converts the hex floating-point string s
495// to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
496// and returns it as a float64.
497// The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
498// If trunc is true, trailing non-zero bits have been omitted from the mantissa.
499func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
500	maxExp := 1<<flt.expbits + flt.bias - 2
501	minExp := flt.bias + 1
502	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
503
504	// Shift mantissa and exponent to bring representation into float range.
505	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
506	// For rounding, we need two more, where the bottom bit represents
507	// whether that bit or any later bit was non-zero.
508	// (If the mantissa has already lost non-zero bits, trunc is true,
509	// and we OR in a 1 below after shifting left appropriately.)
510	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
511		mantissa <<= 1
512		exp--
513	}
514	if trunc {
515		mantissa |= 1
516	}
517	for mantissa>>(1+flt.mantbits+2) != 0 {
518		mantissa = mantissa>>1 | mantissa&1
519		exp++
520	}
521
522	// If exponent is too negative,
523	// denormalize in hopes of making it representable.
524	// (The -2 is for the rounding bits.)
525	for mantissa > 1 && exp < minExp-2 {
526		mantissa = mantissa>>1 | mantissa&1
527		exp++
528	}
529
530	// Round using two bottom bits.
531	round := mantissa & 3
532	mantissa >>= 2
533	round |= mantissa & 1 // round to even (round up if mantissa is odd)
534	exp += 2
535	if round == 3 {
536		mantissa++
537		if mantissa == 1<<(1+flt.mantbits) {
538			mantissa >>= 1
539			exp++
540		}
541	}
542
543	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
544		exp = flt.bias
545	}
546	var err error
547	if exp > maxExp { // infinity and range error
548		mantissa = 1 << flt.mantbits
549		exp = maxExp + 1
550		err = rangeError(fnParseFloat, s)
551	}
552
553	bits := mantissa & (1<<flt.mantbits - 1)
554	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
555	if neg {
556		bits |= 1 << flt.mantbits << flt.expbits
557	}
558	if flt == &float32info {
559		return float64(math.Float32frombits(uint32(bits))), err
560	}
561	return math.Float64frombits(bits), err
562}
563
564const fnParseFloat = "ParseFloat"
565
566func atof32(s string) (f float32, n int, err error) {
567	if val, n, ok := special(s); ok {
568		return float32(val), n, nil
569	}
570
571	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
572	if !ok {
573		return 0, n, syntaxError(fnParseFloat, s)
574	}
575
576	if hex {
577		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
578		return float32(f), n, err
579	}
580
581	if optimize {
582		// Try pure floating-point arithmetic conversion, and if that fails,
583		// the Eisel-Lemire algorithm.
584		if !trunc {
585			if f, ok := atof32exact(mantissa, exp, neg); ok {
586				return f, n, nil
587			}
588		}
589		f, ok := eiselLemire32(mantissa, exp, neg)
590		if ok {
591			if !trunc {
592				return f, n, nil
593			}
594			// Even if the mantissa was truncated, we may
595			// have found the correct result. Confirm by
596			// converting the upper mantissa bound.
597			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
598			if ok && f == fUp {
599				return f, n, nil
600			}
601		}
602	}
603
604	// Slow fallback.
605	var d decimal
606	if !d.set(s[:n]) {
607		return 0, n, syntaxError(fnParseFloat, s)
608	}
609	b, ovf := d.floatBits(&float32info)
610	f = math.Float32frombits(uint32(b))
611	if ovf {
612		err = rangeError(fnParseFloat, s)
613	}
614	return f, n, err
615}
616
617func atof64(s string) (f float64, n int, err error) {
618	if val, n, ok := special(s); ok {
619		return val, n, nil
620	}
621
622	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
623	if !ok {
624		return 0, n, syntaxError(fnParseFloat, s)
625	}
626
627	if hex {
628		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
629		return f, n, err
630	}
631
632	if optimize {
633		// Try pure floating-point arithmetic conversion, and if that fails,
634		// the Eisel-Lemire algorithm.
635		if !trunc {
636			if f, ok := atof64exact(mantissa, exp, neg); ok {
637				return f, n, nil
638			}
639		}
640		f, ok := eiselLemire64(mantissa, exp, neg)
641		if ok {
642			if !trunc {
643				return f, n, nil
644			}
645			// Even if the mantissa was truncated, we may
646			// have found the correct result. Confirm by
647			// converting the upper mantissa bound.
648			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
649			if ok && f == fUp {
650				return f, n, nil
651			}
652		}
653	}
654
655	// Slow fallback.
656	var d decimal
657	if !d.set(s[:n]) {
658		return 0, n, syntaxError(fnParseFloat, s)
659	}
660	b, ovf := d.floatBits(&float64info)
661	f = math.Float64frombits(b)
662	if ovf {
663		err = rangeError(fnParseFloat, s)
664	}
665	return f, n, err
666}
667
668// ParseFloat converts the string s to a floating-point number
669// with the precision specified by bitSize: 32 for float32, or 64 for float64.
670// When bitSize=32, the result still has type float64, but it will be
671// convertible to float32 without changing its value.
672//
673// ParseFloat accepts decimal and hexadecimal floating-point numbers
674// as defined by the Go syntax for [floating-point literals].
675// If s is well-formed and near a valid floating-point number,
676// ParseFloat returns the nearest floating-point number rounded
677// using IEEE754 unbiased rounding.
678// (Parsing a hexadecimal floating-point value only rounds when
679// there are more bits in the hexadecimal representation than
680// will fit in the mantissa.)
681//
682// The errors that ParseFloat returns have concrete type *NumError
683// and include err.Num = s.
684//
685// If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
686//
687// If s is syntactically well-formed but is more than 1/2 ULP
688// away from the largest floating point number of the given size,
689// ParseFloat returns f = ±Inf, err.Err = ErrRange.
690//
691// ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity"
692// as their respective special floating point values. It ignores case when matching.
693//
694// [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals
695func ParseFloat(s string, bitSize int) (float64, error) {
696	f, n, err := parseFloatPrefix(s, bitSize)
697	if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) {
698		return 0, syntaxError(fnParseFloat, s)
699	}
700	return f, err
701}
702
703func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
704	if bitSize == 32 {
705		f, n, err := atof32(s)
706		return float64(f), n, err
707	}
708	return atof64(s)
709}
710