1 /* -*- Mode: C; c-file-style: "python" -*- */
2 
3 #include <Python.h>
4 #include "pycore_dtoa.h"          // _Py_dg_strtod()
5 #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
6 #include <locale.h>
7 
8 /* Case-insensitive string match used for nan and inf detection; t should be
9    lower-case.  Returns 1 for a successful match, 0 otherwise. */
10 
11 static int
case_insensitive_match(const char * s,const char * t)12 case_insensitive_match(const char *s, const char *t)
13 {
14     while(*t && Py_TOLOWER(*s) == *t) {
15         s++;
16         t++;
17     }
18     return *t ? 0 : 1;
19 }
20 
21 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
22    "infinity", with an optional leading sign of "+" or "-".  On success,
23    return the NaN or Infinity as a double and set *endptr to point just beyond
24    the successfully parsed portion of the string.  On failure, return -1.0 and
25    set *endptr to point to the start of the string. */
26 
27 #if _PY_SHORT_FLOAT_REPR == 1
28 
29 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)30 _Py_parse_inf_or_nan(const char *p, char **endptr)
31 {
32     double retval;
33     const char *s;
34     int negate = 0;
35 
36     s = p;
37     if (*s == '-') {
38         negate = 1;
39         s++;
40     }
41     else if (*s == '+') {
42         s++;
43     }
44     if (case_insensitive_match(s, "inf")) {
45         s += 3;
46         if (case_insensitive_match(s, "inity"))
47             s += 5;
48         retval = _Py_dg_infinity(negate);
49     }
50     else if (case_insensitive_match(s, "nan")) {
51         s += 3;
52         retval = _Py_dg_stdnan(negate);
53     }
54     else {
55         s = p;
56         retval = -1.0;
57     }
58     *endptr = (char *)s;
59     return retval;
60 }
61 
62 #else
63 
64 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)65 _Py_parse_inf_or_nan(const char *p, char **endptr)
66 {
67     double retval;
68     const char *s;
69     int negate = 0;
70 
71     s = p;
72     if (*s == '-') {
73         negate = 1;
74         s++;
75     }
76     else if (*s == '+') {
77         s++;
78     }
79     if (case_insensitive_match(s, "inf")) {
80         s += 3;
81         if (case_insensitive_match(s, "inity"))
82             s += 5;
83         retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
84     }
85     else if (case_insensitive_match(s, "nan")) {
86         s += 3;
87         retval = negate ? -Py_NAN : Py_NAN;
88     }
89     else {
90         s = p;
91         retval = -1.0;
92     }
93     *endptr = (char *)s;
94     return retval;
95 }
96 
97 #endif
98 
99 /**
100  * _PyOS_ascii_strtod:
101  * @nptr:    the string to convert to a numeric value.
102  * @endptr:  if non-%NULL, it returns the character after
103  *           the last character used in the conversion.
104  *
105  * Converts a string to a #gdouble value.
106  * This function behaves like the standard strtod() function
107  * does in the C locale. It does this without actually
108  * changing the current locale, since that would not be
109  * thread-safe.
110  *
111  * This function is typically used when reading configuration
112  * files or other non-user input that should be locale independent.
113  * To handle input from the user you should normally use the
114  * locale-sensitive system strtod() function.
115  *
116  * If the correct value would cause overflow, plus or minus %HUGE_VAL
117  * is returned (according to the sign of the value), and %ERANGE is
118  * stored in %errno. If the correct value would cause underflow,
119  * zero is returned and %ERANGE is stored in %errno.
120  * If memory allocation fails, %ENOMEM is stored in %errno.
121  *
122  * This function resets %errno before calling strtod() so that
123  * you can reliably detect overflow and underflow.
124  *
125  * Return value: the #gdouble value.
126  **/
127 
128 #if _PY_SHORT_FLOAT_REPR == 1
129 
130 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)131 _PyOS_ascii_strtod(const char *nptr, char **endptr)
132 {
133     double result;
134     _Py_SET_53BIT_PRECISION_HEADER;
135 
136     assert(nptr != NULL);
137     /* Set errno to zero, so that we can distinguish zero results
138        and underflows */
139     errno = 0;
140 
141     _Py_SET_53BIT_PRECISION_START;
142     result = _Py_dg_strtod(nptr, endptr);
143     _Py_SET_53BIT_PRECISION_END;
144 
145     if (*endptr == nptr)
146         /* string might represent an inf or nan */
147         result = _Py_parse_inf_or_nan(nptr, endptr);
148 
149     return result;
150 
151 }
152 
153 #else
154 
155 /*
156    Use system strtod;  since strtod is locale aware, we may
157    have to first fix the decimal separator.
158 
159    Note that unlike _Py_dg_strtod, the system strtod may not always give
160    correctly rounded results.
161 */
162 
163 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)164 _PyOS_ascii_strtod(const char *nptr, char **endptr)
165 {
166     char *fail_pos;
167     double val;
168     struct lconv *locale_data;
169     const char *decimal_point;
170     size_t decimal_point_len;
171     const char *p, *decimal_point_pos;
172     const char *end = NULL; /* Silence gcc */
173     const char *digits_pos = NULL;
174     int negate = 0;
175 
176     assert(nptr != NULL);
177 
178     fail_pos = NULL;
179 
180     locale_data = localeconv();
181     decimal_point = locale_data->decimal_point;
182     decimal_point_len = strlen(decimal_point);
183 
184     assert(decimal_point_len != 0);
185 
186     decimal_point_pos = NULL;
187 
188     /* Parse infinities and nans */
189     val = _Py_parse_inf_or_nan(nptr, endptr);
190     if (*endptr != nptr)
191         return val;
192 
193     /* Set errno to zero, so that we can distinguish zero results
194        and underflows */
195     errno = 0;
196 
197     /* We process the optional sign manually, then pass the remainder to
198        the system strtod.  This ensures that the result of an underflow
199        has the correct sign. (bug #1725)  */
200     p = nptr;
201     /* Process leading sign, if present */
202     if (*p == '-') {
203         negate = 1;
204         p++;
205     }
206     else if (*p == '+') {
207         p++;
208     }
209 
210     /* Some platform strtods accept hex floats; Python shouldn't (at the
211        moment), so we check explicitly for strings starting with '0x'. */
212     if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213         goto invalid_string;
214 
215     /* Check that what's left begins with a digit or decimal point */
216     if (!Py_ISDIGIT(*p) && *p != '.')
217         goto invalid_string;
218 
219     digits_pos = p;
220     if (decimal_point[0] != '.' ||
221         decimal_point[1] != 0)
222     {
223         /* Look for a '.' in the input; if present, it'll need to be
224            swapped for the current locale's decimal point before we
225            call strtod.  On the other hand, if we find the current
226            locale's decimal point then the input is invalid. */
227         while (Py_ISDIGIT(*p))
228             p++;
229 
230         if (*p == '.')
231         {
232             decimal_point_pos = p++;
233 
234             /* locate end of number */
235             while (Py_ISDIGIT(*p))
236                 p++;
237 
238             if (*p == 'e' || *p == 'E')
239                 p++;
240             if (*p == '+' || *p == '-')
241                 p++;
242             while (Py_ISDIGIT(*p))
243                 p++;
244             end = p;
245         }
246         else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247             /* Python bug #1417699 */
248             goto invalid_string;
249         /* For the other cases, we need not convert the decimal
250            point */
251     }
252 
253     if (decimal_point_pos) {
254         char *copy, *c;
255         /* Create a copy of the input, with the '.' converted to the
256            locale-specific decimal point */
257         copy = (char *)PyMem_Malloc(end - digits_pos +
258                                     1 + decimal_point_len);
259         if (copy == NULL) {
260             *endptr = (char *)nptr;
261             errno = ENOMEM;
262             return val;
263         }
264 
265         c = copy;
266         memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267         c += decimal_point_pos - digits_pos;
268         memcpy(c, decimal_point, decimal_point_len);
269         c += decimal_point_len;
270         memcpy(c, decimal_point_pos + 1,
271                end - (decimal_point_pos + 1));
272         c += end - (decimal_point_pos + 1);
273         *c = 0;
274 
275         val = strtod(copy, &fail_pos);
276 
277         if (fail_pos)
278         {
279             if (fail_pos > decimal_point_pos)
280                 fail_pos = (char *)digits_pos +
281                     (fail_pos - copy) -
282                     (decimal_point_len - 1);
283             else
284                 fail_pos = (char *)digits_pos +
285                     (fail_pos - copy);
286         }
287 
288         PyMem_Free(copy);
289 
290     }
291     else {
292         val = strtod(digits_pos, &fail_pos);
293     }
294 
295     if (fail_pos == digits_pos)
296         goto invalid_string;
297 
298     if (negate && fail_pos != nptr)
299         val = -val;
300     *endptr = fail_pos;
301 
302     return val;
303 
304   invalid_string:
305     *endptr = (char*)nptr;
306     errno = EINVAL;
307     return -1.0;
308 }
309 
310 #endif
311 
312 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313    as a string of ASCII characters) to a float.  The string should not have
314    leading or trailing whitespace.  The conversion is independent of the
315    current locale.
316 
317    If endptr is NULL, try to convert the whole string.  Raise ValueError and
318    return -1.0 if the string is not a valid representation of a floating-point
319    number.
320 
321    If endptr is non-NULL, try to convert as much of the string as possible.
322    If no initial segment of the string is the valid representation of a
323    floating-point number then *endptr is set to point to the beginning of the
324    string, -1.0 is returned and again ValueError is raised.
325 
326    On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327    if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328    exception is raised.  Otherwise, overflow_exception should point to
329    a Python exception, this exception will be raised, -1.0 will be returned,
330    and *endptr will point just past the end of the converted value.
331 
332    If any other failure occurs (for example lack of memory), -1.0 is returned
333    and the appropriate Python exception will have been set.
334 */
335 
336 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)337 PyOS_string_to_double(const char *s,
338                       char **endptr,
339                       PyObject *overflow_exception)
340 {
341     double x, result=-1.0;
342     char *fail_pos;
343 
344     errno = 0;
345     x = _PyOS_ascii_strtod(s, &fail_pos);
346 
347     if (errno == ENOMEM) {
348         PyErr_NoMemory();
349         fail_pos = (char *)s;
350     }
351     else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
352         PyErr_Format(PyExc_ValueError,
353                       "could not convert string to float: "
354                       "'%.200s'", s);
355     else if (fail_pos == s)
356         PyErr_Format(PyExc_ValueError,
357                       "could not convert string to float: "
358                       "'%.200s'", s);
359     else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
360         PyErr_Format(overflow_exception,
361                       "value too large to convert to float: "
362                       "'%.200s'", s);
363     else
364         result = x;
365 
366     if (endptr != NULL)
367         *endptr = fail_pos;
368     return result;
369 }
370 
371 /* Remove underscores that follow the underscore placement rule from
372    the string and then call the `innerfunc` function on the result.
373    It should return a new object or NULL on exception.
374 
375    `what` is used for the error message emitted when underscores are detected
376    that don't follow the rule. `arg` is an opaque pointer passed to the inner
377    function.
378 
379    This is used to implement underscore-agnostic conversion for floats
380    and complex numbers.
381 */
382 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))383 _Py_string_to_number_with_underscores(
384     const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
385     PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
386 {
387     char prev;
388     const char *p, *last;
389     char *dup, *end;
390     PyObject *result;
391 
392     assert(s[orig_len] == '\0');
393 
394     if (strchr(s, '_') == NULL) {
395         return innerfunc(s, orig_len, arg);
396     }
397 
398     dup = PyMem_Malloc(orig_len + 1);
399     if (dup == NULL) {
400         return PyErr_NoMemory();
401     }
402     end = dup;
403     prev = '\0';
404     last = s + orig_len;
405     for (p = s; *p; p++) {
406         if (*p == '_') {
407             /* Underscores are only allowed after digits. */
408             if (!(prev >= '0' && prev <= '9')) {
409                 goto error;
410             }
411         }
412         else {
413             *end++ = *p;
414             /* Underscores are only allowed before digits. */
415             if (prev == '_' && !(*p >= '0' && *p <= '9')) {
416                 goto error;
417             }
418         }
419         prev = *p;
420     }
421     /* Underscores are not allowed at the end. */
422     if (prev == '_') {
423         goto error;
424     }
425     /* No embedded NULs allowed. */
426     if (p != last) {
427         goto error;
428     }
429     *end = '\0';
430     result = innerfunc(dup, end - dup, arg);
431     PyMem_Free(dup);
432     return result;
433 
434   error:
435     PyMem_Free(dup);
436     PyErr_Format(PyExc_ValueError,
437                  "could not convert string to %s: "
438                  "%R", what, obj);
439     return NULL;
440 }
441 
442 #if _PY_SHORT_FLOAT_REPR == 0
443 
444 /* Given a string that may have a decimal point in the current
445    locale, change it back to a dot.  Since the string cannot get
446    longer, no need for a maximum buffer size parameter. */
447 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)448 change_decimal_from_locale_to_dot(char* buffer)
449 {
450     struct lconv *locale_data = localeconv();
451     const char *decimal_point = locale_data->decimal_point;
452 
453     if (decimal_point[0] != '.' || decimal_point[1] != 0) {
454         size_t decimal_point_len = strlen(decimal_point);
455 
456         if (*buffer == '+' || *buffer == '-')
457             buffer++;
458         while (Py_ISDIGIT(*buffer))
459             buffer++;
460         if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
461             *buffer = '.';
462             buffer++;
463             if (decimal_point_len > 1) {
464                 /* buffer needs to get smaller */
465                 size_t rest_len = strlen(buffer +
466                                      (decimal_point_len - 1));
467                 memmove(buffer,
468                     buffer + (decimal_point_len - 1),
469                     rest_len);
470                 buffer[rest_len] = 0;
471             }
472         }
473     }
474 }
475 
476 
477 /* From the C99 standard, section 7.19.6:
478 The exponent always contains at least two digits, and only as many more digits
479 as necessary to represent the exponent.
480 */
481 #define MIN_EXPONENT_DIGITS 2
482 
483 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
484    in length. */
485 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)486 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
487 {
488     char *p = strpbrk(buffer, "eE");
489     if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
490         char *start = p + 2;
491         int exponent_digit_cnt = 0;
492         int leading_zero_cnt = 0;
493         int in_leading_zeros = 1;
494         int significant_digit_cnt;
495 
496         /* Skip over the exponent and the sign. */
497         p += 2;
498 
499         /* Find the end of the exponent, keeping track of leading
500            zeros. */
501         while (*p && Py_ISDIGIT(*p)) {
502             if (in_leading_zeros && *p == '0')
503                 ++leading_zero_cnt;
504             if (*p != '0')
505                 in_leading_zeros = 0;
506             ++p;
507             ++exponent_digit_cnt;
508         }
509 
510         significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
511         if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
512             /* If there are 2 exactly digits, we're done,
513                regardless of what they contain */
514         }
515         else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
516             int extra_zeros_cnt;
517 
518             /* There are more than 2 digits in the exponent.  See
519                if we can delete some of the leading zeros */
520             if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
521                 significant_digit_cnt = MIN_EXPONENT_DIGITS;
522             extra_zeros_cnt = exponent_digit_cnt -
523                 significant_digit_cnt;
524 
525             /* Delete extra_zeros_cnt worth of characters from the
526                front of the exponent */
527             assert(extra_zeros_cnt >= 0);
528 
529             /* Add one to significant_digit_cnt to copy the
530                trailing 0 byte, thus setting the length */
531             memmove(start,
532                 start + extra_zeros_cnt,
533                 significant_digit_cnt + 1);
534         }
535         else {
536             /* If there are fewer than 2 digits, add zeros
537                until there are 2, if there's enough room */
538             int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
539             if (start + zeros + exponent_digit_cnt + 1
540                   < buffer + buf_size) {
541                 memmove(start + zeros, start,
542                     exponent_digit_cnt + 1);
543                 memset(start, '0', zeros);
544             }
545         }
546     }
547 }
548 
549 /* Remove trailing zeros after the decimal point from a numeric string; also
550    remove the decimal point if all digits following it are zero.  The numeric
551    string must end in '\0', and should not have any leading or trailing
552    whitespace.  Assumes that the decimal point is '.'. */
553 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)554 remove_trailing_zeros(char *buffer)
555 {
556     char *old_fraction_end, *new_fraction_end, *end, *p;
557 
558     p = buffer;
559     if (*p == '-' || *p == '+')
560         /* Skip leading sign, if present */
561         ++p;
562     while (Py_ISDIGIT(*p))
563         ++p;
564 
565     /* if there's no decimal point there's nothing to do */
566     if (*p++ != '.')
567         return;
568 
569     /* scan any digits after the point */
570     while (Py_ISDIGIT(*p))
571         ++p;
572     old_fraction_end = p;
573 
574     /* scan up to ending '\0' */
575     while (*p != '\0')
576         p++;
577     /* +1 to make sure that we move the null byte as well */
578     end = p+1;
579 
580     /* scan back from fraction_end, looking for removable zeros */
581     p = old_fraction_end;
582     while (*(p-1) == '0')
583         --p;
584     /* and remove point if we've got that far */
585     if (*(p-1) == '.')
586         --p;
587     new_fraction_end = p;
588 
589     memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
590 }
591 
592 /* Ensure that buffer has a decimal point in it.  The decimal point will not
593    be in the current locale, it will always be '.'. Don't add a decimal point
594    if an exponent is present.  Also, convert to exponential notation where
595    adding a '.0' would produce too many significant digits (see issue 5864).
596 
597    Returns a pointer to the fixed buffer, or NULL on failure.
598 */
599 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)600 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
601 {
602     int digit_count, insert_count = 0, convert_to_exp = 0;
603     const char *chars_to_insert;
604     char *digits_start;
605 
606     /* search for the first non-digit character */
607     char *p = buffer;
608     if (*p == '-' || *p == '+')
609         /* Skip leading sign, if present.  I think this could only
610            ever be '-', but it can't hurt to check for both. */
611         ++p;
612     digits_start = p;
613     while (*p && Py_ISDIGIT(*p))
614         ++p;
615     digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
616 
617     if (*p == '.') {
618         if (Py_ISDIGIT(*(p+1))) {
619             /* Nothing to do, we already have a decimal
620                point and a digit after it */
621         }
622         else {
623             /* We have a decimal point, but no following
624                digit.  Insert a zero after the decimal. */
625             /* can't ever get here via PyOS_double_to_string */
626             assert(precision == -1);
627             ++p;
628             chars_to_insert = "0";
629             insert_count = 1;
630         }
631     }
632     else if (!(*p == 'e' || *p == 'E')) {
633         /* Don't add ".0" if we have an exponent. */
634         if (digit_count == precision) {
635             /* issue 5864: don't add a trailing .0 in the case
636                where the '%g'-formatted result already has as many
637                significant digits as were requested.  Switch to
638                exponential notation instead. */
639             convert_to_exp = 1;
640             /* no exponent, no point, and we shouldn't land here
641                for infs and nans, so we must be at the end of the
642                string. */
643             assert(*p == '\0');
644         }
645         else {
646             assert(precision == -1 || digit_count < precision);
647             chars_to_insert = ".0";
648             insert_count = 2;
649         }
650     }
651     if (insert_count) {
652         size_t buf_len = strlen(buffer);
653         if (buf_len + insert_count + 1 >= buf_size) {
654             /* If there is not enough room in the buffer
655                for the additional text, just skip it.  It's
656                not worth generating an error over. */
657         }
658         else {
659             memmove(p + insert_count, p,
660                 buffer + strlen(buffer) - p + 1);
661             memcpy(p, chars_to_insert, insert_count);
662         }
663     }
664     if (convert_to_exp) {
665         int written;
666         size_t buf_avail;
667         p = digits_start;
668         /* insert decimal point */
669         assert(digit_count >= 1);
670         memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
671         p[1] = '.';
672         p += digit_count+1;
673         assert(p <= buf_size+buffer);
674         buf_avail = buf_size+buffer-p;
675         if (buf_avail == 0)
676             return NULL;
677         /* Add exponent.  It's okay to use lower case 'e': we only
678            arrive here as a result of using the empty format code or
679            repr/str builtins and those never want an upper case 'E' */
680         written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
681         if (!(0 <= written &&
682               written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
683             /* output truncated, or something else bad happened */
684             return NULL;
685         remove_trailing_zeros(buffer);
686     }
687     return buffer;
688 }
689 
690 /* see FORMATBUFLEN in unicodeobject.c */
691 #define FLOAT_FORMATBUFLEN 120
692 
693 /**
694  * _PyOS_ascii_formatd:
695  * @buffer: A buffer to place the resulting string in
696  * @buf_size: The length of the buffer.
697  * @format: The printf()-style format to use for the
698  *          code to use for converting.
699  * @d: The #gdouble to convert
700  * @precision: The precision to use when formatting.
701  *
702  * Converts a #gdouble to a string, using the '.' as
703  * decimal point. To format the number you pass in
704  * a printf()-style format string. Allowed conversion
705  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
706  *
707  * 'Z' is the same as 'g', except it always has a decimal and
708  *     at least one digit after the decimal.
709  *
710  * Return value: The pointer to the buffer with the converted string.
711  * On failure returns NULL but does not set any Python exception.
712  **/
713 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)714 _PyOS_ascii_formatd(char       *buffer,
715                    size_t      buf_size,
716                    const char *format,
717                    double      d,
718                    int         precision)
719 {
720     char format_char;
721     size_t format_len = strlen(format);
722 
723     /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
724        also with at least one character past the decimal. */
725     char tmp_format[FLOAT_FORMATBUFLEN];
726 
727     /* The last character in the format string must be the format char */
728     format_char = format[format_len - 1];
729 
730     if (format[0] != '%')
731         return NULL;
732 
733     /* I'm not sure why this test is here.  It's ensuring that the format
734        string after the first character doesn't have a single quote, a
735        lowercase l, or a percent. This is the reverse of the commented-out
736        test about 10 lines ago. */
737     if (strpbrk(format + 1, "'l%"))
738         return NULL;
739 
740     /* Also curious about this function is that it accepts format strings
741        like "%xg", which are invalid for floats.  In general, the
742        interface to this function is not very good, but changing it is
743        difficult because it's a public API. */
744 
745     if (!(format_char == 'e' || format_char == 'E' ||
746           format_char == 'f' || format_char == 'F' ||
747           format_char == 'g' || format_char == 'G' ||
748           format_char == 'Z'))
749         return NULL;
750 
751     /* Map 'Z' format_char to 'g', by copying the format string and
752        replacing the final char with a 'g' */
753     if (format_char == 'Z') {
754         if (format_len + 1 >= sizeof(tmp_format)) {
755             /* The format won't fit in our copy.  Error out.  In
756                practice, this will never happen and will be
757                detected by returning NULL */
758             return NULL;
759         }
760         strcpy(tmp_format, format);
761         tmp_format[format_len - 1] = 'g';
762         format = tmp_format;
763     }
764 
765 
766     /* Have PyOS_snprintf do the hard work */
767     PyOS_snprintf(buffer, buf_size, format, d);
768 
769     /* Do various fixups on the return string */
770 
771     /* Get the current locale, and find the decimal point string.
772        Convert that string back to a dot. */
773     change_decimal_from_locale_to_dot(buffer);
774 
775     /* If an exponent exists, ensure that the exponent is at least
776        MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
777        for the extra zeros.  Also, if there are more than
778        MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
779        back to MIN_EXPONENT_DIGITS */
780     ensure_minimum_exponent_length(buffer, buf_size);
781 
782     /* If format_char is 'Z', make sure we have at least one character
783        after the decimal point (and make sure we have a decimal point);
784        also switch to exponential notation in some edge cases where the
785        extra character would produce more significant digits that we
786        really want. */
787     if (format_char == 'Z')
788         buffer = ensure_decimal_point(buffer, buf_size, precision);
789 
790     return buffer;
791 }
792 
793 /* The fallback code to use if _Py_dg_dtoa is not available. */
794 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)795 char * PyOS_double_to_string(double val,
796                                          char format_code,
797                                          int precision,
798                                          int flags,
799                                          int *type)
800 {
801     char format[32];
802     Py_ssize_t bufsize;
803     char *buf;
804     int t, exp;
805     int upper = 0;
806 
807     /* Validate format_code, and map upper and lower case */
808     switch (format_code) {
809     case 'e':          /* exponent */
810     case 'f':          /* fixed */
811     case 'g':          /* general */
812         break;
813     case 'E':
814         upper = 1;
815         format_code = 'e';
816         break;
817     case 'F':
818         upper = 1;
819         format_code = 'f';
820         break;
821     case 'G':
822         upper = 1;
823         format_code = 'g';
824         break;
825     case 'r':          /* repr format */
826         /* Supplied precision is unused, must be 0. */
827         if (precision != 0) {
828             PyErr_BadInternalCall();
829             return NULL;
830         }
831         /* The repr() precision (17 significant decimal digits) is the
832            minimal number that is guaranteed to have enough precision
833            so that if the number is read back in the exact same binary
834            value is recreated.  This is true for IEEE floating point
835            by design, and also happens to work for all other modern
836            hardware. */
837         precision = 17;
838         format_code = 'g';
839         break;
840     default:
841         PyErr_BadInternalCall();
842         return NULL;
843     }
844 
845     /* Here's a quick-and-dirty calculation to figure out how big a buffer
846        we need.  In general, for a finite float we need:
847 
848          1 byte for each digit of the decimal significand, and
849 
850          1 for a possible sign
851          1 for a possible decimal point
852          2 for a possible [eE][+-]
853          1 for each digit of the exponent;  if we allow 19 digits
854            total then we're safe up to exponents of 2**63.
855          1 for the trailing nul byte
856 
857        This gives a total of 24 + the number of digits in the significand,
858        and the number of digits in the significand is:
859 
860          for 'g' format: at most precision, except possibly
861            when precision == 0, when it's 1.
862          for 'e' format: precision+1
863          for 'f' format: precision digits after the point, at least 1
864            before.  To figure out how many digits appear before the point
865            we have to examine the size of the number.  If fabs(val) < 1.0
866            then there will be only one digit before the point.  If
867            fabs(val) >= 1.0, then there are at most
868 
869          1+floor(log10(ceiling(fabs(val))))
870 
871            digits before the point (where the 'ceiling' allows for the
872            possibility that the rounding rounds the integer part of val
873            up).  A safe upper bound for the above quantity is
874            1+floor(exp/3), where exp is the unique integer such that 0.5
875            <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
876            frexp.
877 
878        So we allow room for precision+1 digits for all formats, plus an
879        extra floor(exp/3) digits for 'f' format.
880 
881     */
882 
883     if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
884         /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
885         bufsize = 5;
886     else {
887         bufsize = 25 + precision;
888         if (format_code == 'f' && fabs(val) >= 1.0) {
889             frexp(val, &exp);
890             bufsize += exp/3;
891         }
892     }
893 
894     buf = PyMem_Malloc(bufsize);
895     if (buf == NULL) {
896         PyErr_NoMemory();
897         return NULL;
898     }
899 
900     /* Handle nan and inf. */
901     if (Py_IS_NAN(val)) {
902         strcpy(buf, "nan");
903         t = Py_DTST_NAN;
904     } else if (Py_IS_INFINITY(val)) {
905         if (copysign(1., val) == 1.)
906             strcpy(buf, "inf");
907         else
908             strcpy(buf, "-inf");
909         t = Py_DTST_INFINITE;
910     } else {
911         t = Py_DTST_FINITE;
912         if (flags & Py_DTSF_ADD_DOT_0)
913             format_code = 'Z';
914 
915         PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
916                       (flags & Py_DTSF_ALT ? "#" : ""), precision,
917                       format_code);
918         _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
919 
920         if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') {
921             char *buf2 = buf + 1;
922             while (*buf2 == '0' || *buf2 == '.') {
923                 ++buf2;
924             }
925             if (*buf2 == 0 || *buf2 == 'e') {
926                 size_t len = buf2 - buf + strlen(buf2);
927                 assert(buf[len] == 0);
928                 memmove(buf, buf+1, len);
929             }
930         }
931     }
932 
933     /* Add sign when requested.  It's convenient (esp. when formatting
934      complex numbers) to include a sign even for inf and nan. */
935     if (flags & Py_DTSF_SIGN && buf[0] != '-') {
936         size_t len = strlen(buf);
937         /* the bufsize calculations above should ensure that we've got
938            space to add a sign */
939         assert((size_t)bufsize >= len+2);
940         memmove(buf+1, buf, len+1);
941         buf[0] = '+';
942     }
943     if (upper) {
944         /* Convert to upper case. */
945         char *p1;
946         for (p1 = buf; *p1; p1++)
947             *p1 = Py_TOUPPER(*p1);
948     }
949 
950     if (type)
951         *type = t;
952     return buf;
953 }
954 
955 #else  // _PY_SHORT_FLOAT_REPR == 1
956 
957 /* _Py_dg_dtoa is available. */
958 
959 /* I'm using a lookup table here so that I don't have to invent a non-locale
960    specific way to convert to uppercase */
961 #define OFS_INF 0
962 #define OFS_NAN 1
963 #define OFS_E 2
964 
965 /* The lengths of these are known to the code below, so don't change them */
966 static const char * const lc_float_strings[] = {
967     "inf",
968     "nan",
969     "e",
970 };
971 static const char * const uc_float_strings[] = {
972     "INF",
973     "NAN",
974     "E",
975 };
976 
977 
978 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
979    memory contain the resulting string.
980 
981    Arguments:
982      d is the double to be converted
983      format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
984        correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
985      mode is one of '0', '2' or '3', and is completely determined by
986        format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
987      precision is the desired precision
988      always_add_sign is nonzero if a '+' sign should be included for positive
989        numbers
990      add_dot_0_if_integer is nonzero if integers in non-exponential form
991        should have ".0" added.  Only applies to format codes 'r' and 'g'.
992      use_alt_formatting is nonzero if alternative formatting should be
993        used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
994        at most one of use_alt_formatting and add_dot_0_if_integer should
995        be nonzero.
996      type, if non-NULL, will be set to one of these constants to identify
997        the type of the 'd' argument:
998      Py_DTST_FINITE
999      Py_DTST_INFINITE
1000      Py_DTST_NAN
1001 
1002    Returns a PyMem_Malloc'd block of memory containing the resulting string,
1003     or NULL on error. If NULL is returned, the Python error has been set.
1004  */
1005 
1006 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,int no_negative_zero,const char * const * float_strings,int * type)1007 format_float_short(double d, char format_code,
1008                    int mode, int precision,
1009                    int always_add_sign, int add_dot_0_if_integer,
1010                    int use_alt_formatting, int no_negative_zero,
1011                    const char * const *float_strings, int *type)
1012 {
1013     char *buf = NULL;
1014     char *p = NULL;
1015     Py_ssize_t bufsize = 0;
1016     char *digits, *digits_end;
1017     int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1018     Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1019     _Py_SET_53BIT_PRECISION_HEADER;
1020 
1021     /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1022        Must be matched by a call to _Py_dg_freedtoa. */
1023     _Py_SET_53BIT_PRECISION_START;
1024     digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1025                          &digits_end);
1026     _Py_SET_53BIT_PRECISION_END;
1027 
1028     decpt = (Py_ssize_t)decpt_as_int;
1029     if (digits == NULL) {
1030         /* The only failure mode is no memory. */
1031         PyErr_NoMemory();
1032         goto exit;
1033     }
1034     assert(digits_end != NULL && digits_end >= digits);
1035     digits_len = digits_end - digits;
1036 
1037     if (no_negative_zero && sign == 1 &&
1038             (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) {
1039         sign = 0;
1040     }
1041 
1042     if (digits_len && !Py_ISDIGIT(digits[0])) {
1043         /* Infinities and nans here; adapt Gay's output,
1044            so convert Infinity to inf and NaN to nan, and
1045            ignore sign of nan. Then return. */
1046 
1047         /* ignore the actual sign of a nan */
1048         if (digits[0] == 'n' || digits[0] == 'N')
1049             sign = 0;
1050 
1051         /* We only need 5 bytes to hold the result "+inf\0" . */
1052         bufsize = 5; /* Used later in an assert. */
1053         buf = (char *)PyMem_Malloc(bufsize);
1054         if (buf == NULL) {
1055             PyErr_NoMemory();
1056             goto exit;
1057         }
1058         p = buf;
1059 
1060         if (sign == 1) {
1061             *p++ = '-';
1062         }
1063         else if (always_add_sign) {
1064             *p++ = '+';
1065         }
1066         if (digits[0] == 'i' || digits[0] == 'I') {
1067             strncpy(p, float_strings[OFS_INF], 3);
1068             p += 3;
1069 
1070             if (type)
1071                 *type = Py_DTST_INFINITE;
1072         }
1073         else if (digits[0] == 'n' || digits[0] == 'N') {
1074             strncpy(p, float_strings[OFS_NAN], 3);
1075             p += 3;
1076 
1077             if (type)
1078                 *type = Py_DTST_NAN;
1079         }
1080         else {
1081             /* shouldn't get here: Gay's code should always return
1082                something starting with a digit, an 'I',  or 'N' */
1083             Py_UNREACHABLE();
1084         }
1085         goto exit;
1086     }
1087 
1088     /* The result must be finite (not inf or nan). */
1089     if (type)
1090         *type = Py_DTST_FINITE;
1091 
1092 
1093     /* We got digits back, format them.  We may need to pad 'digits'
1094        either on the left or right (or both) with extra zeros, so in
1095        general the resulting string has the form
1096 
1097          [<sign>]<zeros><digits><zeros>[<exponent>]
1098 
1099        where either of the <zeros> pieces could be empty, and there's a
1100        decimal point that could appear either in <digits> or in the
1101        leading or trailing <zeros>.
1102 
1103        Imagine an infinite 'virtual' string vdigits, consisting of the
1104        string 'digits' (starting at index 0) padded on both the left and
1105        right with infinite strings of zeros.  We want to output a slice
1106 
1107          vdigits[vdigits_start : vdigits_end]
1108 
1109        of this virtual string.  Thus if vdigits_start < 0 then we'll end
1110        up producing some leading zeros; if vdigits_end > digits_len there
1111        will be trailing zeros in the output.  The next section of code
1112        determines whether to use an exponent or not, figures out the
1113        position 'decpt' of the decimal point, and computes 'vdigits_start'
1114        and 'vdigits_end'. */
1115     vdigits_end = digits_len;
1116     switch (format_code) {
1117     case 'e':
1118         use_exp = 1;
1119         vdigits_end = precision;
1120         break;
1121     case 'f':
1122         vdigits_end = decpt + precision;
1123         break;
1124     case 'g':
1125         if (decpt <= -4 || decpt >
1126             (add_dot_0_if_integer ? precision-1 : precision))
1127             use_exp = 1;
1128         if (use_alt_formatting)
1129             vdigits_end = precision;
1130         break;
1131     case 'r':
1132         /* convert to exponential format at 1e16.  We used to convert
1133            at 1e17, but that gives odd-looking results for some values
1134            when a 16-digit 'shortest' repr is padded with bogus zeros.
1135            For example, repr(2e16+8) would give 20000000000000010.0;
1136            the true value is 20000000000000008.0. */
1137         if (decpt <= -4 || decpt > 16)
1138             use_exp = 1;
1139         break;
1140     default:
1141         PyErr_BadInternalCall();
1142         goto exit;
1143     }
1144 
1145     /* if using an exponent, reset decimal point position to 1 and adjust
1146        exponent accordingly.*/
1147     if (use_exp) {
1148         exp = (int)decpt - 1;
1149         decpt = 1;
1150     }
1151     /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1152        decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1153     vdigits_start = decpt <= 0 ? decpt-1 : 0;
1154     if (!use_exp && add_dot_0_if_integer)
1155         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1156     else
1157         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1158 
1159     /* double check inequalities */
1160     assert(vdigits_start <= 0 &&
1161            0 <= digits_len &&
1162            digits_len <= vdigits_end);
1163     /* decimal point should be in (vdigits_start, vdigits_end] */
1164     assert(vdigits_start < decpt && decpt <= vdigits_end);
1165 
1166     /* Compute an upper bound how much memory we need. This might be a few
1167        chars too long, but no big deal. */
1168     bufsize =
1169         /* sign, decimal point and trailing 0 byte */
1170         3 +
1171 
1172         /* total digit count (including zero padding on both sides) */
1173         (vdigits_end - vdigits_start) +
1174 
1175         /* exponent "e+100", max 3 numerical digits */
1176         (use_exp ? 5 : 0);
1177 
1178     /* Now allocate the memory and initialize p to point to the start of
1179        it. */
1180     buf = (char *)PyMem_Malloc(bufsize);
1181     if (buf == NULL) {
1182         PyErr_NoMemory();
1183         goto exit;
1184     }
1185     p = buf;
1186 
1187     /* Add a negative sign if negative, and a plus sign if non-negative
1188        and always_add_sign is true. */
1189     if (sign == 1)
1190         *p++ = '-';
1191     else if (always_add_sign)
1192         *p++ = '+';
1193 
1194     /* note that exactly one of the three 'if' conditions is true,
1195        so we include exactly one decimal point */
1196     /* Zero padding on left of digit string */
1197     if (decpt <= 0) {
1198         memset(p, '0', decpt-vdigits_start);
1199         p += decpt - vdigits_start;
1200         *p++ = '.';
1201         memset(p, '0', 0-decpt);
1202         p += 0-decpt;
1203     }
1204     else {
1205         memset(p, '0', 0-vdigits_start);
1206         p += 0 - vdigits_start;
1207     }
1208 
1209     /* Digits, with included decimal point */
1210     if (0 < decpt && decpt <= digits_len) {
1211         strncpy(p, digits, decpt-0);
1212         p += decpt-0;
1213         *p++ = '.';
1214         strncpy(p, digits+decpt, digits_len-decpt);
1215         p += digits_len-decpt;
1216     }
1217     else {
1218         strncpy(p, digits, digits_len);
1219         p += digits_len;
1220     }
1221 
1222     /* And zeros on the right */
1223     if (digits_len < decpt) {
1224         memset(p, '0', decpt-digits_len);
1225         p += decpt-digits_len;
1226         *p++ = '.';
1227         memset(p, '0', vdigits_end-decpt);
1228         p += vdigits_end-decpt;
1229     }
1230     else {
1231         memset(p, '0', vdigits_end-digits_len);
1232         p += vdigits_end-digits_len;
1233     }
1234 
1235     /* Delete a trailing decimal pt unless using alternative formatting. */
1236     if (p[-1] == '.' && !use_alt_formatting)
1237         p--;
1238 
1239     /* Now that we've done zero padding, add an exponent if needed. */
1240     if (use_exp) {
1241         *p++ = float_strings[OFS_E][0];
1242         exp_len = sprintf(p, "%+.02d", exp);
1243         p += exp_len;
1244     }
1245   exit:
1246     if (buf) {
1247         *p = '\0';
1248         /* It's too late if this fails, as we've already stepped on
1249            memory that isn't ours. But it's an okay debugging test. */
1250         assert(p-buf < bufsize);
1251     }
1252     if (digits)
1253         _Py_dg_freedtoa(digits);
1254 
1255     return buf;
1256 }
1257 
1258 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1259 char * PyOS_double_to_string(double val,
1260                                          char format_code,
1261                                          int precision,
1262                                          int flags,
1263                                          int *type)
1264 {
1265     const char * const *float_strings = lc_float_strings;
1266     int mode;
1267 
1268     /* Validate format_code, and map upper and lower case. Compute the
1269        mode and make any adjustments as needed. */
1270     switch (format_code) {
1271     /* exponent */
1272     case 'E':
1273         float_strings = uc_float_strings;
1274         format_code = 'e';
1275         /* Fall through. */
1276     case 'e':
1277         mode = 2;
1278         precision++;
1279         break;
1280 
1281     /* fixed */
1282     case 'F':
1283         float_strings = uc_float_strings;
1284         format_code = 'f';
1285         /* Fall through. */
1286     case 'f':
1287         mode = 3;
1288         break;
1289 
1290     /* general */
1291     case 'G':
1292         float_strings = uc_float_strings;
1293         format_code = 'g';
1294         /* Fall through. */
1295     case 'g':
1296         mode = 2;
1297         /* precision 0 makes no sense for 'g' format; interpret as 1 */
1298         if (precision == 0)
1299             precision = 1;
1300         break;
1301 
1302     /* repr format */
1303     case 'r':
1304         mode = 0;
1305         /* Supplied precision is unused, must be 0. */
1306         if (precision != 0) {
1307             PyErr_BadInternalCall();
1308             return NULL;
1309         }
1310         break;
1311 
1312     default:
1313         PyErr_BadInternalCall();
1314         return NULL;
1315     }
1316 
1317     return format_float_short(val, format_code, mode, precision,
1318                               flags & Py_DTSF_SIGN,
1319                               flags & Py_DTSF_ADD_DOT_0,
1320                               flags & Py_DTSF_ALT,
1321                               flags & Py_DTSF_NO_NEG_0,
1322                               float_strings, type);
1323 }
1324 #endif  // _PY_SHORT_FLOAT_REPR == 1
1325