1 /* implements the unicode (as opposed to string) version of the
2    built-in formatters for string, int, float.  that is, the versions
3    of int.__float__, etc., that take and return unicode objects */
4 
5 #include "Python.h"
6 #include "pycore_fileutils.h"     // _Py_GetLocaleconvNumeric()
7 #include "pycore_long.h"          // _PyLong_FormatWriter()
8 #include <locale.h>
9 
10 /* Raises an exception about an unknown presentation type for this
11  * type. */
12 
13 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)14 unknown_presentation_type(Py_UCS4 presentation_type,
15                           const char* type_name)
16 {
17     /* %c might be out-of-range, hence the two cases. */
18     if (presentation_type > 32 && presentation_type < 128)
19         PyErr_Format(PyExc_ValueError,
20                      "Unknown format code '%c' "
21                      "for object of type '%.200s'",
22                      (char)presentation_type,
23                      type_name);
24     else
25         PyErr_Format(PyExc_ValueError,
26                      "Unknown format code '\\x%x' "
27                      "for object of type '%.200s'",
28                      (unsigned int)presentation_type,
29                      type_name);
30 }
31 
32 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)33 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
34 {
35     assert(specifier == ',' || specifier == '_');
36     if (presentation_type > 32 && presentation_type < 128)
37         PyErr_Format(PyExc_ValueError,
38                      "Cannot specify '%c' with '%c'.",
39                      specifier, (char)presentation_type);
40     else
41         PyErr_Format(PyExc_ValueError,
42                      "Cannot specify '%c' with '\\x%x'.",
43                      specifier, (unsigned int)presentation_type);
44 }
45 
46 static void
invalid_comma_and_underscore(void)47 invalid_comma_and_underscore(void)
48 {
49     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
50 }
51 
52 /*
53     get_integer consumes 0 or more decimal digit characters from an
54     input string, updates *result with the corresponding positive
55     integer, and returns the number of digits consumed.
56 
57     returns -1 on error.
58 */
59 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)60 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
61                   Py_ssize_t *result)
62 {
63     Py_ssize_t accumulator, digitval, pos = *ppos;
64     int numdigits;
65     int kind = PyUnicode_KIND(str);
66     const void *data = PyUnicode_DATA(str);
67 
68     accumulator = numdigits = 0;
69     for (; pos < end; pos++, numdigits++) {
70         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
71         if (digitval < 0)
72             break;
73         /*
74            Detect possible overflow before it happens:
75 
76               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
77               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
78         */
79         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
80             PyErr_Format(PyExc_ValueError,
81                          "Too many decimal digits in format string");
82             *ppos = pos;
83             return -1;
84         }
85         accumulator = accumulator * 10 + digitval;
86     }
87     *ppos = pos;
88     *result = accumulator;
89     return numdigits;
90 }
91 
92 /************************************************************************/
93 /*********** standard format specifier parsing **************************/
94 /************************************************************************/
95 
96 /* returns true if this character is a specifier alignment token */
97 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)98 is_alignment_token(Py_UCS4 c)
99 {
100     switch (c) {
101     case '<': case '>': case '=': case '^':
102         return 1;
103     default:
104         return 0;
105     }
106 }
107 
108 /* returns true if this character is a sign element */
109 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)110 is_sign_element(Py_UCS4 c)
111 {
112     switch (c) {
113     case ' ': case '+': case '-':
114         return 1;
115     default:
116         return 0;
117     }
118 }
119 
120 /* Locale type codes. LT_NO_LOCALE must be zero. */
121 enum LocaleType {
122     LT_NO_LOCALE = 0,
123     LT_DEFAULT_LOCALE = ',',
124     LT_UNDERSCORE_LOCALE = '_',
125     LT_UNDER_FOUR_LOCALE,
126     LT_CURRENT_LOCALE
127 };
128 
129 typedef struct {
130     Py_UCS4 fill_char;
131     Py_UCS4 align;
132     int alternate;
133     int no_neg_0;
134     Py_UCS4 sign;
135     Py_ssize_t width;
136     enum LocaleType thousands_separators;
137     Py_ssize_t precision;
138     Py_UCS4 type;
139 } InternalFormatSpec;
140 
141 
142 /*
143   ptr points to the start of the format_spec, end points just past its end.
144   fills in format with the parsed information.
145   returns 1 on success, 0 on failure.
146   if failure, sets the exception
147 */
148 static int
parse_internal_render_format_spec(PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)149 parse_internal_render_format_spec(PyObject *obj,
150                                   PyObject *format_spec,
151                                   Py_ssize_t start, Py_ssize_t end,
152                                   InternalFormatSpec *format,
153                                   char default_type,
154                                   char default_align)
155 {
156     Py_ssize_t pos = start;
157     int kind = PyUnicode_KIND(format_spec);
158     const void *data = PyUnicode_DATA(format_spec);
159     /* end-pos is used throughout this code to specify the length of
160        the input string */
161 #define READ_spec(index) PyUnicode_READ(kind, data, index)
162 
163     Py_ssize_t consumed;
164     int align_specified = 0;
165     int fill_char_specified = 0;
166 
167     format->fill_char = ' ';
168     format->align = default_align;
169     format->alternate = 0;
170     format->no_neg_0 = 0;
171     format->sign = '\0';
172     format->width = -1;
173     format->thousands_separators = LT_NO_LOCALE;
174     format->precision = -1;
175     format->type = default_type;
176 
177     /* If the second char is an alignment token,
178        then parse the fill char */
179     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
180         format->align = READ_spec(pos+1);
181         format->fill_char = READ_spec(pos);
182         fill_char_specified = 1;
183         align_specified = 1;
184         pos += 2;
185     }
186     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
187         format->align = READ_spec(pos);
188         align_specified = 1;
189         ++pos;
190     }
191 
192     /* Parse the various sign options */
193     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
194         format->sign = READ_spec(pos);
195         ++pos;
196     }
197 
198     /* If the next character is z, request coercion of negative 0.
199        Applies only to floats. */
200     if (end-pos >= 1 && READ_spec(pos) == 'z') {
201         format->no_neg_0 = 1;
202         ++pos;
203     }
204 
205     /* If the next character is #, we're in alternate mode.  This only
206        applies to integers. */
207     if (end-pos >= 1 && READ_spec(pos) == '#') {
208         format->alternate = 1;
209         ++pos;
210     }
211 
212     /* The special case for 0-padding (backwards compat) */
213     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
214         format->fill_char = '0';
215         if (!align_specified && default_align == '>') {
216             format->align = '=';
217         }
218         ++pos;
219     }
220 
221     consumed = get_integer(format_spec, &pos, end, &format->width);
222     if (consumed == -1)
223         /* Overflow error. Exception already set. */
224         return 0;
225 
226     /* If consumed is 0, we didn't consume any characters for the
227        width. In that case, reset the width to -1, because
228        get_integer() will have set it to zero. -1 is how we record
229        that the width wasn't specified. */
230     if (consumed == 0)
231         format->width = -1;
232 
233     /* Comma signifies add thousands separators */
234     if (end-pos && READ_spec(pos) == ',') {
235         format->thousands_separators = LT_DEFAULT_LOCALE;
236         ++pos;
237     }
238     /* Underscore signifies add thousands separators */
239     if (end-pos && READ_spec(pos) == '_') {
240         if (format->thousands_separators != LT_NO_LOCALE) {
241             invalid_comma_and_underscore();
242             return 0;
243         }
244         format->thousands_separators = LT_UNDERSCORE_LOCALE;
245         ++pos;
246     }
247     if (end-pos && READ_spec(pos) == ',') {
248         if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
249             invalid_comma_and_underscore();
250             return 0;
251         }
252     }
253 
254     /* Parse field precision */
255     if (end-pos && READ_spec(pos) == '.') {
256         ++pos;
257 
258         consumed = get_integer(format_spec, &pos, end, &format->precision);
259         if (consumed == -1)
260             /* Overflow error. Exception already set. */
261             return 0;
262 
263         /* Not having a precision after a dot is an error. */
264         if (consumed == 0) {
265             PyErr_Format(PyExc_ValueError,
266                          "Format specifier missing precision");
267             return 0;
268         }
269 
270     }
271 
272     /* Finally, parse the type field. */
273 
274     if (end-pos > 1) {
275         /* More than one char remains, so this is an invalid format
276            specifier. */
277         /* Create a temporary object that contains the format spec we're
278            operating on.  It's format_spec[start:end] (in Python syntax). */
279         PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
280                                          (char*)data + kind*start,
281                                          end-start);
282         if (actual_format_spec != NULL) {
283             PyErr_Format(PyExc_ValueError,
284                 "Invalid format specifier '%U' for object of type '%.200s'",
285                 actual_format_spec, Py_TYPE(obj)->tp_name);
286             Py_DECREF(actual_format_spec);
287         }
288         return 0;
289     }
290 
291     if (end-pos == 1) {
292         format->type = READ_spec(pos);
293         ++pos;
294     }
295 
296     /* Do as much validating as we can, just by looking at the format
297        specifier.  Do not take into account what type of formatting
298        we're doing (int, float, string). */
299 
300     if (format->thousands_separators) {
301         switch (format->type) {
302         case 'd':
303         case 'e':
304         case 'f':
305         case 'g':
306         case 'E':
307         case 'G':
308         case '%':
309         case 'F':
310         case '\0':
311             /* These are allowed. See PEP 378.*/
312             break;
313         case 'b':
314         case 'o':
315         case 'x':
316         case 'X':
317             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
318             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
319                 /* Every four digits, not every three, in bin/oct/hex. */
320                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
321                 break;
322             }
323             /* fall through */
324         default:
325             invalid_thousands_separator_type(format->thousands_separators, format->type);
326             return 0;
327         }
328     }
329 
330     assert (format->align <= 127);
331     assert (format->sign <= 127);
332     return 1;
333 }
334 
335 /* Calculate the padding needed. */
336 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)337 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
338              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
339              Py_ssize_t *n_total)
340 {
341     if (width >= 0) {
342         if (nchars > width)
343             *n_total = nchars;
344         else
345             *n_total = width;
346     }
347     else {
348         /* not specified, use all of the chars and no more */
349         *n_total = nchars;
350     }
351 
352     /* Figure out how much leading space we need, based on the
353        aligning */
354     if (align == '>')
355         *n_lpadding = *n_total - nchars;
356     else if (align == '^')
357         *n_lpadding = (*n_total - nchars) / 2;
358     else if (align == '<' || align == '=')
359         *n_lpadding = 0;
360     else {
361         /* We should never have an unspecified alignment. */
362         Py_UNREACHABLE();
363     }
364 
365     *n_rpadding = *n_total - nchars - *n_lpadding;
366 }
367 
368 /* Do the padding, and return a pointer to where the caller-supplied
369    content goes. */
370 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)371 fill_padding(_PyUnicodeWriter *writer,
372              Py_ssize_t nchars,
373              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
374              Py_ssize_t n_rpadding)
375 {
376     Py_ssize_t pos;
377 
378     /* Pad on left. */
379     if (n_lpadding) {
380         pos = writer->pos;
381         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
382     }
383 
384     /* Pad on right. */
385     if (n_rpadding) {
386         pos = writer->pos + nchars + n_lpadding;
387         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
388     }
389 
390     /* Pointer to the user content. */
391     writer->pos += n_lpadding;
392     return 0;
393 }
394 
395 /************************************************************************/
396 /*********** common routines for numeric formatting *********************/
397 /************************************************************************/
398 
399 /* Locale info needed for formatting integers and the part of floats
400    before and including the decimal. Note that locales only support
401    8-bit chars, not unicode. */
402 typedef struct {
403     PyObject *decimal_point;
404     PyObject *thousands_sep;
405     const char *grouping;
406     char *grouping_buffer;
407 } LocaleInfo;
408 
409 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
410 
411 /* describes the layout for an integer, see the comment in
412    calc_number_widths() for details */
413 typedef struct {
414     Py_ssize_t n_lpadding;
415     Py_ssize_t n_prefix;
416     Py_ssize_t n_spadding;
417     Py_ssize_t n_rpadding;
418     char sign;
419     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
420     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
421                                     any grouping chars. */
422     Py_ssize_t n_decimal;   /* 0 if only an integer */
423     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424                                excluding the decimal itself, if
425                                present. */
426 
427     /* These 2 are not the widths of fields, but are needed by
428        STRINGLIB_GROUPING. */
429     Py_ssize_t n_digits;    /* The number of digits before a decimal
430                                or exponent. */
431     Py_ssize_t n_min_width; /* The min_width we used when we computed
432                                the n_grouped_digits width. */
433 } NumberFieldWidths;
434 
435 
436 /* Given a number of the form:
437    digits[remainder]
438    where ptr points to the start and end points to the end, find where
439     the integer part ends. This could be a decimal, an exponent, both,
440     or neither.
441    If a decimal point is present, set *has_decimal and increment
442     remainder beyond it.
443    Results are undefined (but shouldn't crash) for improperly
444     formatted strings.
445 */
446 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)447 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448              Py_ssize_t *n_remainder, int *has_decimal)
449 {
450     Py_ssize_t remainder;
451     int kind = PyUnicode_KIND(s);
452     const void *data = PyUnicode_DATA(s);
453 
454     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
455         ++pos;
456     remainder = pos;
457 
458     /* Does remainder start with a decimal point? */
459     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
460 
461     /* Skip the decimal point. */
462     if (*has_decimal)
463         remainder++;
464 
465     *n_remainder = end - remainder;
466 }
467 
468 /* not all fields of format are used.  for example, precision is
469    unused.  should this take discrete params in order to be more clear
470    about what it does?  or is passing a single format parameter easier
471    and more efficient enough to justify a little obfuscation?
472    Return -1 on error. */
473 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)474 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475                    Py_UCS4 sign_char, Py_ssize_t n_start,
476                    Py_ssize_t n_end, Py_ssize_t n_remainder,
477                    int has_decimal, const LocaleInfo *locale,
478                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
479 {
480     Py_ssize_t n_non_digit_non_padding;
481     Py_ssize_t n_padding;
482 
483     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
484     spec->n_lpadding = 0;
485     spec->n_prefix = n_prefix;
486     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487     spec->n_remainder = n_remainder;
488     spec->n_spadding = 0;
489     spec->n_rpadding = 0;
490     spec->sign = '\0';
491     spec->n_sign = 0;
492 
493     /* the output will look like:
494        |                                                                                         |
495        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
496        |                                                                                         |
497 
498        sign is computed from format->sign and the actual
499        sign of the number
500 
501        prefix is given (it's for the '0x' prefix)
502 
503        digits is already known
504 
505        the total width is either given, or computed from the
506        actual digits
507 
508        only one of lpadding, spadding, and rpadding can be non-zero,
509        and it's calculated from the width and other fields
510     */
511 
512     /* compute the various parts we're going to write */
513     switch (format->sign) {
514     case '+':
515         /* always put a + or - */
516         spec->n_sign = 1;
517         spec->sign = (sign_char == '-' ? '-' : '+');
518         break;
519     case ' ':
520         spec->n_sign = 1;
521         spec->sign = (sign_char == '-' ? '-' : ' ');
522         break;
523     default:
524         /* Not specified, or the default (-) */
525         if (sign_char == '-') {
526             spec->n_sign = 1;
527             spec->sign = '-';
528         }
529     }
530 
531     /* The number of chars used for non-digits and non-padding. */
532     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533         spec->n_remainder;
534 
535     /* min_width can go negative, that's okay. format->width == -1 means
536        we don't care. */
537     if (format->fill_char == '0' && format->align == '=')
538         spec->n_min_width = format->width - n_non_digit_non_padding;
539     else
540         spec->n_min_width = 0;
541 
542     if (spec->n_digits == 0)
543         /* This case only occurs when using 'c' formatting, we need
544            to special case it because the grouping code always wants
545            to have at least one character. */
546         spec->n_grouped_digits = 0;
547     else {
548         Py_UCS4 grouping_maxchar;
549         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
550             NULL, 0,
551             NULL, 0, spec->n_digits,
552             spec->n_min_width,
553             locale->grouping, locale->thousands_sep, &grouping_maxchar);
554         if (spec->n_grouped_digits == -1) {
555             return -1;
556         }
557         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
558     }
559 
560     /* Given the desired width and the total of digit and non-digit
561        space we consume, see if we need any padding. format->width can
562        be negative (meaning no padding), but this code still works in
563        that case. */
564     n_padding = format->width -
565                         (n_non_digit_non_padding + spec->n_grouped_digits);
566     if (n_padding > 0) {
567         /* Some padding is needed. Determine if it's left, space, or right. */
568         switch (format->align) {
569         case '<':
570             spec->n_rpadding = n_padding;
571             break;
572         case '^':
573             spec->n_lpadding = n_padding / 2;
574             spec->n_rpadding = n_padding - spec->n_lpadding;
575             break;
576         case '=':
577             spec->n_spadding = n_padding;
578             break;
579         case '>':
580             spec->n_lpadding = n_padding;
581             break;
582         default:
583             /* Shouldn't get here */
584             Py_UNREACHABLE();
585         }
586     }
587 
588     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
589         *maxchar = Py_MAX(*maxchar, format->fill_char);
590 
591     if (spec->n_decimal)
592         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
593 
594     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596         spec->n_remainder + spec->n_rpadding;
597 }
598 
599 /* Fill in the digit parts of a number's string representation,
600    as determined in calc_number_widths().
601    Return -1 on error, or 0 on success. */
602 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)603 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
604             PyObject *digits, Py_ssize_t d_start,
605             PyObject *prefix, Py_ssize_t p_start,
606             Py_UCS4 fill_char,
607             LocaleInfo *locale, int toupper)
608 {
609     /* Used to keep track of digits, decimal, and remainder. */
610     Py_ssize_t d_pos = d_start;
611     const unsigned int kind = writer->kind;
612     const void *data = writer->data;
613     Py_ssize_t r;
614 
615     if (spec->n_lpadding) {
616         _PyUnicode_FastFill(writer->buffer,
617                             writer->pos, spec->n_lpadding, fill_char);
618         writer->pos += spec->n_lpadding;
619     }
620     if (spec->n_sign == 1) {
621         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
622         writer->pos++;
623     }
624     if (spec->n_prefix) {
625         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
626                                       prefix, p_start,
627                                       spec->n_prefix);
628         if (toupper) {
629             Py_ssize_t t;
630             for (t = 0; t < spec->n_prefix; t++) {
631                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
632                 c = Py_TOUPPER(c);
633                 assert (c <= 127);
634                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
635             }
636         }
637         writer->pos += spec->n_prefix;
638     }
639     if (spec->n_spadding) {
640         _PyUnicode_FastFill(writer->buffer,
641                             writer->pos, spec->n_spadding, fill_char);
642         writer->pos += spec->n_spadding;
643     }
644 
645     /* Only for type 'c' special case, it has no digits. */
646     if (spec->n_digits != 0) {
647         /* Fill the digits with InsertThousandsGrouping. */
648         r = _PyUnicode_InsertThousandsGrouping(
649                 writer, spec->n_grouped_digits,
650                 digits, d_pos, spec->n_digits,
651                 spec->n_min_width,
652                 locale->grouping, locale->thousands_sep, NULL);
653         if (r == -1)
654             return -1;
655         assert(r == spec->n_grouped_digits);
656         d_pos += spec->n_digits;
657     }
658     if (toupper) {
659         Py_ssize_t t;
660         for (t = 0; t < spec->n_grouped_digits; t++) {
661             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
662             c = Py_TOUPPER(c);
663             if (c > 127) {
664                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
665                 return -1;
666             }
667             PyUnicode_WRITE(kind, data, writer->pos + t, c);
668         }
669     }
670     writer->pos += spec->n_grouped_digits;
671 
672     if (spec->n_decimal) {
673         _PyUnicode_FastCopyCharacters(
674             writer->buffer, writer->pos,
675             locale->decimal_point, 0, spec->n_decimal);
676         writer->pos += spec->n_decimal;
677         d_pos += 1;
678     }
679 
680     if (spec->n_remainder) {
681         _PyUnicode_FastCopyCharacters(
682             writer->buffer, writer->pos,
683             digits, d_pos, spec->n_remainder);
684         writer->pos += spec->n_remainder;
685         /* d_pos += spec->n_remainder; */
686     }
687 
688     if (spec->n_rpadding) {
689         _PyUnicode_FastFill(writer->buffer,
690                             writer->pos, spec->n_rpadding,
691                             fill_char);
692         writer->pos += spec->n_rpadding;
693     }
694     return 0;
695 }
696 
697 static const char no_grouping[1] = {CHAR_MAX};
698 
699 /* Find the decimal point character(s?), thousands_separator(s?), and
700    grouping description, either for the current locale if type is
701    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)704 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
705 {
706     switch (type) {
707     case LT_CURRENT_LOCALE: {
708         struct lconv *lc = localeconv();
709         if (_Py_GetLocaleconvNumeric(lc,
710                                      &locale_info->decimal_point,
711                                      &locale_info->thousands_sep) < 0) {
712             return -1;
713         }
714 
715         /* localeconv() grouping can become a dangling pointer or point
716            to a different string if another thread calls localeconv() during
717            the string formatting. Copy the string to avoid this risk. */
718         locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
719         if (locale_info->grouping_buffer == NULL) {
720             PyErr_NoMemory();
721             return -1;
722         }
723         locale_info->grouping = locale_info->grouping_buffer;
724         break;
725     }
726     case LT_DEFAULT_LOCALE:
727     case LT_UNDERSCORE_LOCALE:
728     case LT_UNDER_FOUR_LOCALE:
729         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
730         locale_info->thousands_sep = PyUnicode_FromOrdinal(
731             type == LT_DEFAULT_LOCALE ? ',' : '_');
732         if (!locale_info->decimal_point || !locale_info->thousands_sep)
733             return -1;
734         if (type != LT_UNDER_FOUR_LOCALE)
735             locale_info->grouping = "\3"; /* Group every 3 characters.  The
736                                          (implicit) trailing 0 means repeat
737                                          infinitely. */
738         else
739             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
740         break;
741     case LT_NO_LOCALE:
742         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
743         locale_info->thousands_sep = PyUnicode_New(0, 0);
744         if (!locale_info->decimal_point || !locale_info->thousands_sep)
745             return -1;
746         locale_info->grouping = no_grouping;
747         break;
748     }
749     return 0;
750 }
751 
752 static void
free_locale_info(LocaleInfo * locale_info)753 free_locale_info(LocaleInfo *locale_info)
754 {
755     Py_XDECREF(locale_info->decimal_point);
756     Py_XDECREF(locale_info->thousands_sep);
757     PyMem_Free(locale_info->grouping_buffer);
758 }
759 
760 /************************************************************************/
761 /*********** string formatting ******************************************/
762 /************************************************************************/
763 
764 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)765 format_string_internal(PyObject *value, const InternalFormatSpec *format,
766                        _PyUnicodeWriter *writer)
767 {
768     Py_ssize_t lpad;
769     Py_ssize_t rpad;
770     Py_ssize_t total;
771     Py_ssize_t len;
772     int result = -1;
773     Py_UCS4 maxchar;
774 
775     assert(PyUnicode_IS_READY(value));
776     len = PyUnicode_GET_LENGTH(value);
777 
778     /* sign is not allowed on strings */
779     if (format->sign != '\0') {
780         if (format->sign == ' ') {
781             PyErr_SetString(PyExc_ValueError,
782                 "Space not allowed in string format specifier");
783         }
784         else {
785             PyErr_SetString(PyExc_ValueError,
786                 "Sign not allowed in string format specifier");
787         }
788         goto done;
789     }
790 
791     /* negative 0 coercion is not allowed on strings */
792     if (format->no_neg_0) {
793         PyErr_SetString(PyExc_ValueError,
794                         "Negative zero coercion (z) not allowed in string format "
795                         "specifier");
796         goto done;
797     }
798 
799     /* alternate is not allowed on strings */
800     if (format->alternate) {
801         PyErr_SetString(PyExc_ValueError,
802                         "Alternate form (#) not allowed in string format "
803                         "specifier");
804         goto done;
805     }
806 
807     /* '=' alignment not allowed on strings */
808     if (format->align == '=') {
809         PyErr_SetString(PyExc_ValueError,
810                         "'=' alignment not allowed "
811                         "in string format specifier");
812         goto done;
813     }
814 
815     if ((format->width == -1 || format->width <= len)
816         && (format->precision == -1 || format->precision >= len)) {
817         /* Fast path */
818         return _PyUnicodeWriter_WriteStr(writer, value);
819     }
820 
821     /* if precision is specified, output no more that format.precision
822        characters */
823     if (format->precision >= 0 && len >= format->precision) {
824         len = format->precision;
825     }
826 
827     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
828 
829     maxchar = writer->maxchar;
830     if (lpad != 0 || rpad != 0)
831         maxchar = Py_MAX(maxchar, format->fill_char);
832     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
833         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
834         maxchar = Py_MAX(maxchar, valmaxchar);
835     }
836 
837     /* allocate the resulting string */
838     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
839         goto done;
840 
841     /* Write into that space. First the padding. */
842     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
843     if (result == -1)
844         goto done;
845 
846     /* Then the source string. */
847     if (len) {
848         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
849                                       value, 0, len);
850     }
851     writer->pos += (len + rpad);
852     result = 0;
853 
854 done:
855     return result;
856 }
857 
858 
859 /************************************************************************/
860 /*********** long formatting ********************************************/
861 /************************************************************************/
862 
863 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)864 format_long_internal(PyObject *value, const InternalFormatSpec *format,
865                      _PyUnicodeWriter *writer)
866 {
867     int result = -1;
868     Py_UCS4 maxchar = 127;
869     PyObject *tmp = NULL;
870     Py_ssize_t inumeric_chars;
871     Py_UCS4 sign_char = '\0';
872     Py_ssize_t n_digits;       /* count of digits need from the computed
873                                   string */
874     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
875                                    produces non-digits */
876     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
877     Py_ssize_t n_total;
878     Py_ssize_t prefix = 0;
879     NumberFieldWidths spec;
880     long x;
881 
882     /* Locale settings, either from the actual locale or
883        from a hard-code pseudo-locale */
884     LocaleInfo locale = LocaleInfo_STATIC_INIT;
885 
886     /* no precision allowed on integers */
887     if (format->precision != -1) {
888         PyErr_SetString(PyExc_ValueError,
889                         "Precision not allowed in integer format specifier");
890         goto done;
891     }
892     /* no negative zero coercion on integers */
893     if (format->no_neg_0) {
894         PyErr_SetString(PyExc_ValueError,
895                         "Negative zero coercion (z) not allowed in integer"
896                         " format specifier");
897         goto done;
898     }
899 
900     /* special case for character formatting */
901     if (format->type == 'c') {
902         /* error to specify a sign */
903         if (format->sign != '\0') {
904             PyErr_SetString(PyExc_ValueError,
905                             "Sign not allowed with integer"
906                             " format specifier 'c'");
907             goto done;
908         }
909         /* error to request alternate format */
910         if (format->alternate) {
911             PyErr_SetString(PyExc_ValueError,
912                             "Alternate form (#) not allowed with integer"
913                             " format specifier 'c'");
914             goto done;
915         }
916 
917         /* taken from unicodeobject.c formatchar() */
918         /* Integer input truncated to a character */
919         x = PyLong_AsLong(value);
920         if (x == -1 && PyErr_Occurred())
921             goto done;
922         if (x < 0 || x > 0x10ffff) {
923             PyErr_SetString(PyExc_OverflowError,
924                             "%c arg not in range(0x110000)");
925             goto done;
926         }
927         tmp = PyUnicode_FromOrdinal(x);
928         inumeric_chars = 0;
929         n_digits = 1;
930         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
931 
932         /* As a sort-of hack, we tell calc_number_widths that we only
933            have "remainder" characters. calc_number_widths thinks
934            these are characters that don't get formatted, only copied
935            into the output string. We do this for 'c' formatting,
936            because the characters are likely to be non-digits. */
937         n_remainder = 1;
938     }
939     else {
940         int base;
941         int leading_chars_to_skip = 0;  /* Number of characters added by
942                                            PyNumber_ToBase that we want to
943                                            skip over. */
944 
945         /* Compute the base and how many characters will be added by
946            PyNumber_ToBase */
947         switch (format->type) {
948         case 'b':
949             base = 2;
950             leading_chars_to_skip = 2; /* 0b */
951             break;
952         case 'o':
953             base = 8;
954             leading_chars_to_skip = 2; /* 0o */
955             break;
956         case 'x':
957         case 'X':
958             base = 16;
959             leading_chars_to_skip = 2; /* 0x */
960             break;
961         default:  /* shouldn't be needed, but stops a compiler warning */
962         case 'd':
963         case 'n':
964             base = 10;
965             break;
966         }
967 
968         if (format->sign != '+' && format->sign != ' '
969             && format->width == -1
970             && format->type != 'X' && format->type != 'n'
971             && !format->thousands_separators
972             && PyLong_CheckExact(value))
973         {
974             /* Fast path */
975             return _PyLong_FormatWriter(writer, value, base, format->alternate);
976         }
977 
978         /* The number of prefix chars is the same as the leading
979            chars to skip */
980         if (format->alternate)
981             n_prefix = leading_chars_to_skip;
982 
983         /* Do the hard part, converting to a string in a given base */
984         tmp = _PyLong_Format(value, base);
985         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
986             goto done;
987 
988         inumeric_chars = 0;
989         n_digits = PyUnicode_GET_LENGTH(tmp);
990 
991         prefix = inumeric_chars;
992 
993         /* Is a sign character present in the output?  If so, remember it
994            and skip it */
995         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
996             sign_char = '-';
997             ++prefix;
998             ++leading_chars_to_skip;
999         }
1000 
1001         /* Skip over the leading chars (0x, 0b, etc.) */
1002         n_digits -= leading_chars_to_skip;
1003         inumeric_chars += leading_chars_to_skip;
1004     }
1005 
1006     /* Determine the grouping, separator, and decimal point, if any. */
1007     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008                         format->thousands_separators,
1009                         &locale) == -1)
1010         goto done;
1011 
1012     /* Calculate how much memory we'll need. */
1013     n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014                                  inumeric_chars + n_digits, n_remainder, 0,
1015                                  &locale, format, &maxchar);
1016     if (n_total == -1) {
1017         goto done;
1018     }
1019 
1020     /* Allocate the memory. */
1021     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1022         goto done;
1023 
1024     /* Populate the memory. */
1025     result = fill_number(writer, &spec,
1026                          tmp, inumeric_chars,
1027                          tmp, prefix, format->fill_char,
1028                          &locale, format->type == 'X');
1029 
1030 done:
1031     Py_XDECREF(tmp);
1032     free_locale_info(&locale);
1033     return result;
1034 }
1035 
1036 /************************************************************************/
1037 /*********** float formatting *******************************************/
1038 /************************************************************************/
1039 
1040 /* much of this is taken from unicodeobject.c */
1041 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1042 format_float_internal(PyObject *value,
1043                       const InternalFormatSpec *format,
1044                       _PyUnicodeWriter *writer)
1045 {
1046     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1047     Py_ssize_t n_digits;
1048     Py_ssize_t n_remainder;
1049     Py_ssize_t n_total;
1050     int has_decimal;
1051     double val;
1052     int precision, default_precision = 6;
1053     Py_UCS4 type = format->type;
1054     int add_pct = 0;
1055     Py_ssize_t index;
1056     NumberFieldWidths spec;
1057     int flags = 0;
1058     int result = -1;
1059     Py_UCS4 maxchar = 127;
1060     Py_UCS4 sign_char = '\0';
1061     int float_type; /* Used to see if we have a nan, inf, or regular float. */
1062     PyObject *unicode_tmp = NULL;
1063 
1064     /* Locale settings, either from the actual locale or
1065        from a hard-code pseudo-locale */
1066     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1067 
1068     if (format->precision > INT_MAX) {
1069         PyErr_SetString(PyExc_ValueError, "precision too big");
1070         goto done;
1071     }
1072     precision = (int)format->precision;
1073 
1074     if (format->alternate)
1075         flags |= Py_DTSF_ALT;
1076     if (format->no_neg_0)
1077         flags |= Py_DTSF_NO_NEG_0;
1078 
1079     if (type == '\0') {
1080         /* Omitted type specifier.  Behaves in the same way as repr(x)
1081            and str(x) if no precision is given, else like 'g', but with
1082            at least one digit after the decimal point. */
1083         flags |= Py_DTSF_ADD_DOT_0;
1084         type = 'r';
1085         default_precision = 0;
1086     }
1087 
1088     if (type == 'n')
1089         /* 'n' is the same as 'g', except for the locale used to
1090            format the result. We take care of that later. */
1091         type = 'g';
1092 
1093     val = PyFloat_AsDouble(value);
1094     if (val == -1.0 && PyErr_Occurred())
1095         goto done;
1096 
1097     if (type == '%') {
1098         type = 'f';
1099         val *= 100;
1100         add_pct = 1;
1101     }
1102 
1103     if (precision < 0)
1104         precision = default_precision;
1105     else if (type == 'r')
1106         type = 'g';
1107 
1108     /* Cast "type", because if we're in unicode we need to pass an
1109        8-bit char. This is safe, because we've restricted what "type"
1110        can be. */
1111     buf = PyOS_double_to_string(val, (char)type, precision, flags,
1112                                 &float_type);
1113     if (buf == NULL)
1114         goto done;
1115     n_digits = strlen(buf);
1116 
1117     if (add_pct) {
1118         /* We know that buf has a trailing zero (since we just called
1119            strlen() on it), and we don't use that fact any more. So we
1120            can just write over the trailing zero. */
1121         buf[n_digits] = '%';
1122         n_digits += 1;
1123     }
1124 
1125     if (format->sign != '+' && format->sign != ' '
1126         && format->width == -1
1127         && format->type != 'n'
1128         && !format->thousands_separators)
1129     {
1130         /* Fast path */
1131         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1132         PyMem_Free(buf);
1133         return result;
1134     }
1135 
1136     /* Since there is no unicode version of PyOS_double_to_string,
1137        just use the 8 bit version and then convert to unicode. */
1138     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1139     PyMem_Free(buf);
1140     if (unicode_tmp == NULL)
1141         goto done;
1142 
1143     /* Is a sign character present in the output?  If so, remember it
1144        and skip it */
1145     index = 0;
1146     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1147         sign_char = '-';
1148         ++index;
1149         --n_digits;
1150     }
1151 
1152     /* Determine if we have any "remainder" (after the digits, might include
1153        decimal or exponent or both (or neither)) */
1154     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1155 
1156     /* Determine the grouping, separator, and decimal point, if any. */
1157     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1158                         format->thousands_separators,
1159                         &locale) == -1)
1160         goto done;
1161 
1162     /* Calculate how much memory we'll need. */
1163     n_total = calc_number_widths(&spec, 0, sign_char, index,
1164                                  index + n_digits, n_remainder, has_decimal,
1165                                  &locale, format, &maxchar);
1166     if (n_total == -1) {
1167         goto done;
1168     }
1169 
1170     /* Allocate the memory. */
1171     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1172         goto done;
1173 
1174     /* Populate the memory. */
1175     result = fill_number(writer, &spec,
1176                          unicode_tmp, index,
1177                          NULL, 0, format->fill_char,
1178                          &locale, 0);
1179 
1180 done:
1181     Py_XDECREF(unicode_tmp);
1182     free_locale_info(&locale);
1183     return result;
1184 }
1185 
1186 /************************************************************************/
1187 /*********** complex formatting *****************************************/
1188 /************************************************************************/
1189 
1190 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1191 format_complex_internal(PyObject *value,
1192                         const InternalFormatSpec *format,
1193                         _PyUnicodeWriter *writer)
1194 {
1195     double re;
1196     double im;
1197     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1198     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1199 
1200     InternalFormatSpec tmp_format = *format;
1201     Py_ssize_t n_re_digits;
1202     Py_ssize_t n_im_digits;
1203     Py_ssize_t n_re_remainder;
1204     Py_ssize_t n_im_remainder;
1205     Py_ssize_t n_re_total;
1206     Py_ssize_t n_im_total;
1207     int re_has_decimal;
1208     int im_has_decimal;
1209     int precision, default_precision = 6;
1210     Py_UCS4 type = format->type;
1211     Py_ssize_t i_re;
1212     Py_ssize_t i_im;
1213     NumberFieldWidths re_spec;
1214     NumberFieldWidths im_spec;
1215     int flags = 0;
1216     int result = -1;
1217     Py_UCS4 maxchar = 127;
1218     enum PyUnicode_Kind rkind;
1219     void *rdata;
1220     Py_UCS4 re_sign_char = '\0';
1221     Py_UCS4 im_sign_char = '\0';
1222     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1223     int im_float_type;
1224     int add_parens = 0;
1225     int skip_re = 0;
1226     Py_ssize_t lpad;
1227     Py_ssize_t rpad;
1228     Py_ssize_t total;
1229     PyObject *re_unicode_tmp = NULL;
1230     PyObject *im_unicode_tmp = NULL;
1231 
1232     /* Locale settings, either from the actual locale or
1233        from a hard-code pseudo-locale */
1234     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1235 
1236     if (format->precision > INT_MAX) {
1237         PyErr_SetString(PyExc_ValueError, "precision too big");
1238         goto done;
1239     }
1240     precision = (int)format->precision;
1241 
1242     /* Zero padding is not allowed. */
1243     if (format->fill_char == '0') {
1244         PyErr_SetString(PyExc_ValueError,
1245                         "Zero padding is not allowed in complex format "
1246                         "specifier");
1247         goto done;
1248     }
1249 
1250     /* Neither is '=' alignment . */
1251     if (format->align == '=') {
1252         PyErr_SetString(PyExc_ValueError,
1253                         "'=' alignment flag is not allowed in complex format "
1254                         "specifier");
1255         goto done;
1256     }
1257 
1258     re = PyComplex_RealAsDouble(value);
1259     if (re == -1.0 && PyErr_Occurred())
1260         goto done;
1261     im = PyComplex_ImagAsDouble(value);
1262     if (im == -1.0 && PyErr_Occurred())
1263         goto done;
1264 
1265     if (format->alternate)
1266         flags |= Py_DTSF_ALT;
1267     if (format->no_neg_0)
1268         flags |= Py_DTSF_NO_NEG_0;
1269 
1270     if (type == '\0') {
1271         /* Omitted type specifier. Should be like str(self). */
1272         type = 'r';
1273         default_precision = 0;
1274         if (re == 0.0 && copysign(1.0, re) == 1.0)
1275             skip_re = 1;
1276         else
1277             add_parens = 1;
1278     }
1279 
1280     if (type == 'n')
1281         /* 'n' is the same as 'g', except for the locale used to
1282            format the result. We take care of that later. */
1283         type = 'g';
1284 
1285     if (precision < 0)
1286         precision = default_precision;
1287     else if (type == 'r')
1288         type = 'g';
1289 
1290     /* Cast "type", because if we're in unicode we need to pass an
1291        8-bit char. This is safe, because we've restricted what "type"
1292        can be. */
1293     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1294                                    &re_float_type);
1295     if (re_buf == NULL)
1296         goto done;
1297     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1298                                    &im_float_type);
1299     if (im_buf == NULL)
1300         goto done;
1301 
1302     n_re_digits = strlen(re_buf);
1303     n_im_digits = strlen(im_buf);
1304 
1305     /* Since there is no unicode version of PyOS_double_to_string,
1306        just use the 8 bit version and then convert to unicode. */
1307     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1308     if (re_unicode_tmp == NULL)
1309         goto done;
1310     i_re = 0;
1311 
1312     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1313     if (im_unicode_tmp == NULL)
1314         goto done;
1315     i_im = 0;
1316 
1317     /* Is a sign character present in the output?  If so, remember it
1318        and skip it */
1319     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1320         re_sign_char = '-';
1321         ++i_re;
1322         --n_re_digits;
1323     }
1324     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1325         im_sign_char = '-';
1326         ++i_im;
1327         --n_im_digits;
1328     }
1329 
1330     /* Determine if we have any "remainder" (after the digits, might include
1331        decimal or exponent or both (or neither)) */
1332     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333                  &n_re_remainder, &re_has_decimal);
1334     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335                  &n_im_remainder, &im_has_decimal);
1336 
1337     /* Determine the grouping, separator, and decimal point, if any. */
1338     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1339                         format->thousands_separators,
1340                         &locale) == -1)
1341         goto done;
1342 
1343     /* Turn off any padding. We'll do it later after we've composed
1344        the numbers without padding. */
1345     tmp_format.fill_char = '\0';
1346     tmp_format.align = '<';
1347     tmp_format.width = -1;
1348 
1349     /* Calculate how much memory we'll need. */
1350     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1351                                     i_re, i_re + n_re_digits, n_re_remainder,
1352                                     re_has_decimal, &locale, &tmp_format,
1353                                     &maxchar);
1354     if (n_re_total == -1) {
1355         goto done;
1356     }
1357 
1358     /* Same formatting, but always include a sign, unless the real part is
1359      * going to be omitted, in which case we use whatever sign convention was
1360      * requested by the original format. */
1361     if (!skip_re)
1362         tmp_format.sign = '+';
1363     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1364                                     i_im, i_im + n_im_digits, n_im_remainder,
1365                                     im_has_decimal, &locale, &tmp_format,
1366                                     &maxchar);
1367     if (n_im_total == -1) {
1368         goto done;
1369     }
1370 
1371     if (skip_re)
1372         n_re_total = 0;
1373 
1374     /* Add 1 for the 'j', and optionally 2 for parens. */
1375     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1376                  format->width, format->align, &lpad, &rpad, &total);
1377 
1378     if (lpad || rpad)
1379         maxchar = Py_MAX(maxchar, format->fill_char);
1380 
1381     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1382         goto done;
1383     rkind = writer->kind;
1384     rdata = writer->data;
1385 
1386     /* Populate the memory. First, the padding. */
1387     result = fill_padding(writer,
1388                           n_re_total + n_im_total + 1 + add_parens * 2,
1389                           format->fill_char, lpad, rpad);
1390     if (result == -1)
1391         goto done;
1392 
1393     if (add_parens) {
1394         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1395         writer->pos++;
1396     }
1397 
1398     if (!skip_re) {
1399         result = fill_number(writer, &re_spec,
1400                              re_unicode_tmp, i_re,
1401                              NULL, 0,
1402                              0,
1403                              &locale, 0);
1404         if (result == -1)
1405             goto done;
1406     }
1407     result = fill_number(writer, &im_spec,
1408                          im_unicode_tmp, i_im,
1409                          NULL, 0,
1410                          0,
1411                          &locale, 0);
1412     if (result == -1)
1413         goto done;
1414     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1415     writer->pos++;
1416 
1417     if (add_parens) {
1418         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1419         writer->pos++;
1420     }
1421 
1422     writer->pos += rpad;
1423 
1424 done:
1425     PyMem_Free(re_buf);
1426     PyMem_Free(im_buf);
1427     Py_XDECREF(re_unicode_tmp);
1428     Py_XDECREF(im_unicode_tmp);
1429     free_locale_info(&locale);
1430     return result;
1431 }
1432 
1433 /************************************************************************/
1434 /*********** built in formatters ****************************************/
1435 /************************************************************************/
1436 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1437 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1438 {
1439     PyObject *str;
1440     int err;
1441 
1442     str = PyObject_Str(obj);
1443     if (str == NULL)
1444         return -1;
1445     err = _PyUnicodeWriter_WriteStr(writer, str);
1446     Py_DECREF(str);
1447     return err;
1448 }
1449 
1450 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1451 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1452                                 PyObject *obj,
1453                                 PyObject *format_spec,
1454                                 Py_ssize_t start, Py_ssize_t end)
1455 {
1456     InternalFormatSpec format;
1457 
1458     assert(PyUnicode_Check(obj));
1459 
1460     /* check for the special case of zero length format spec, make
1461        it equivalent to str(obj) */
1462     if (start == end) {
1463         if (PyUnicode_CheckExact(obj))
1464             return _PyUnicodeWriter_WriteStr(writer, obj);
1465         else
1466             return format_obj(obj, writer);
1467     }
1468 
1469     /* parse the format_spec */
1470     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1471                                            &format, 's', '<'))
1472         return -1;
1473 
1474     /* type conversion? */
1475     switch (format.type) {
1476     case 's':
1477         /* no type conversion needed, already a string.  do the formatting */
1478         return format_string_internal(obj, &format, writer);
1479     default:
1480         /* unknown */
1481         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1482         return -1;
1483     }
1484 }
1485 
1486 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1487 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1488                              PyObject *obj,
1489                              PyObject *format_spec,
1490                              Py_ssize_t start, Py_ssize_t end)
1491 {
1492     PyObject *tmp = NULL;
1493     InternalFormatSpec format;
1494     int result = -1;
1495 
1496     /* check for the special case of zero length format spec, make
1497        it equivalent to str(obj) */
1498     if (start == end) {
1499         if (PyLong_CheckExact(obj))
1500             return _PyLong_FormatWriter(writer, obj, 10, 0);
1501         else
1502             return format_obj(obj, writer);
1503     }
1504 
1505     /* parse the format_spec */
1506     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1507                                            &format, 'd', '>'))
1508         goto done;
1509 
1510     /* type conversion? */
1511     switch (format.type) {
1512     case 'b':
1513     case 'c':
1514     case 'd':
1515     case 'o':
1516     case 'x':
1517     case 'X':
1518     case 'n':
1519         /* no type conversion needed, already an int.  do the formatting */
1520         result = format_long_internal(obj, &format, writer);
1521         break;
1522 
1523     case 'e':
1524     case 'E':
1525     case 'f':
1526     case 'F':
1527     case 'g':
1528     case 'G':
1529     case '%':
1530         /* convert to float */
1531         tmp = PyNumber_Float(obj);
1532         if (tmp == NULL)
1533             goto done;
1534         result = format_float_internal(tmp, &format, writer);
1535         break;
1536 
1537     default:
1538         /* unknown */
1539         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1540         goto done;
1541     }
1542 
1543 done:
1544     Py_XDECREF(tmp);
1545     return result;
1546 }
1547 
1548 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1549 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1550                               PyObject *obj,
1551                               PyObject *format_spec,
1552                               Py_ssize_t start, Py_ssize_t end)
1553 {
1554     InternalFormatSpec format;
1555 
1556     /* check for the special case of zero length format spec, make
1557        it equivalent to str(obj) */
1558     if (start == end)
1559         return format_obj(obj, writer);
1560 
1561     /* parse the format_spec */
1562     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1563                                            &format, '\0', '>'))
1564         return -1;
1565 
1566     /* type conversion? */
1567     switch (format.type) {
1568     case '\0': /* No format code: like 'g', but with at least one decimal. */
1569     case 'e':
1570     case 'E':
1571     case 'f':
1572     case 'F':
1573     case 'g':
1574     case 'G':
1575     case 'n':
1576     case '%':
1577         /* no conversion, already a float.  do the formatting */
1578         return format_float_internal(obj, &format, writer);
1579 
1580     default:
1581         /* unknown */
1582         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1583         return -1;
1584     }
1585 }
1586 
1587 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1588 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1589                                 PyObject *obj,
1590                                 PyObject *format_spec,
1591                                 Py_ssize_t start, Py_ssize_t end)
1592 {
1593     InternalFormatSpec format;
1594 
1595     /* check for the special case of zero length format spec, make
1596        it equivalent to str(obj) */
1597     if (start == end)
1598         return format_obj(obj, writer);
1599 
1600     /* parse the format_spec */
1601     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1602                                            &format, '\0', '>'))
1603         return -1;
1604 
1605     /* type conversion? */
1606     switch (format.type) {
1607     case '\0': /* No format code: like 'g', but with at least one decimal. */
1608     case 'e':
1609     case 'E':
1610     case 'f':
1611     case 'F':
1612     case 'g':
1613     case 'G':
1614     case 'n':
1615         /* no conversion, already a complex.  do the formatting */
1616         return format_complex_internal(obj, &format, writer);
1617 
1618     default:
1619         /* unknown */
1620         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1621         return -1;
1622     }
1623 }
1624