1 /* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5 #include "Python.h"
6 #include "pycore_fileutils.h" // _Py_GetLocaleconvNumeric()
7 #include "pycore_long.h" // _PyLong_FormatWriter()
8 #include <locale.h>
9
10 /* Raises an exception about an unknown presentation type for this
11 * type. */
12
13 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)14 unknown_presentation_type(Py_UCS4 presentation_type,
15 const char* type_name)
16 {
17 /* %c might be out-of-range, hence the two cases. */
18 if (presentation_type > 32 && presentation_type < 128)
19 PyErr_Format(PyExc_ValueError,
20 "Unknown format code '%c' "
21 "for object of type '%.200s'",
22 (char)presentation_type,
23 type_name);
24 else
25 PyErr_Format(PyExc_ValueError,
26 "Unknown format code '\\x%x' "
27 "for object of type '%.200s'",
28 (unsigned int)presentation_type,
29 type_name);
30 }
31
32 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)33 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
34 {
35 assert(specifier == ',' || specifier == '_');
36 if (presentation_type > 32 && presentation_type < 128)
37 PyErr_Format(PyExc_ValueError,
38 "Cannot specify '%c' with '%c'.",
39 specifier, (char)presentation_type);
40 else
41 PyErr_Format(PyExc_ValueError,
42 "Cannot specify '%c' with '\\x%x'.",
43 specifier, (unsigned int)presentation_type);
44 }
45
46 static void
invalid_comma_and_underscore(void)47 invalid_comma_and_underscore(void)
48 {
49 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
50 }
51
52 /*
53 get_integer consumes 0 or more decimal digit characters from an
54 input string, updates *result with the corresponding positive
55 integer, and returns the number of digits consumed.
56
57 returns -1 on error.
58 */
59 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)60 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
61 Py_ssize_t *result)
62 {
63 Py_ssize_t accumulator, digitval, pos = *ppos;
64 int numdigits;
65 int kind = PyUnicode_KIND(str);
66 const void *data = PyUnicode_DATA(str);
67
68 accumulator = numdigits = 0;
69 for (; pos < end; pos++, numdigits++) {
70 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
71 if (digitval < 0)
72 break;
73 /*
74 Detect possible overflow before it happens:
75
76 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
77 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
78 */
79 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
80 PyErr_Format(PyExc_ValueError,
81 "Too many decimal digits in format string");
82 *ppos = pos;
83 return -1;
84 }
85 accumulator = accumulator * 10 + digitval;
86 }
87 *ppos = pos;
88 *result = accumulator;
89 return numdigits;
90 }
91
92 /************************************************************************/
93 /*********** standard format specifier parsing **************************/
94 /************************************************************************/
95
96 /* returns true if this character is a specifier alignment token */
97 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)98 is_alignment_token(Py_UCS4 c)
99 {
100 switch (c) {
101 case '<': case '>': case '=': case '^':
102 return 1;
103 default:
104 return 0;
105 }
106 }
107
108 /* returns true if this character is a sign element */
109 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)110 is_sign_element(Py_UCS4 c)
111 {
112 switch (c) {
113 case ' ': case '+': case '-':
114 return 1;
115 default:
116 return 0;
117 }
118 }
119
120 /* Locale type codes. LT_NO_LOCALE must be zero. */
121 enum LocaleType {
122 LT_NO_LOCALE = 0,
123 LT_DEFAULT_LOCALE = ',',
124 LT_UNDERSCORE_LOCALE = '_',
125 LT_UNDER_FOUR_LOCALE,
126 LT_CURRENT_LOCALE
127 };
128
129 typedef struct {
130 Py_UCS4 fill_char;
131 Py_UCS4 align;
132 int alternate;
133 int no_neg_0;
134 Py_UCS4 sign;
135 Py_ssize_t width;
136 enum LocaleType thousands_separators;
137 Py_ssize_t precision;
138 Py_UCS4 type;
139 } InternalFormatSpec;
140
141
142 /*
143 ptr points to the start of the format_spec, end points just past its end.
144 fills in format with the parsed information.
145 returns 1 on success, 0 on failure.
146 if failure, sets the exception
147 */
148 static int
parse_internal_render_format_spec(PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)149 parse_internal_render_format_spec(PyObject *obj,
150 PyObject *format_spec,
151 Py_ssize_t start, Py_ssize_t end,
152 InternalFormatSpec *format,
153 char default_type,
154 char default_align)
155 {
156 Py_ssize_t pos = start;
157 int kind = PyUnicode_KIND(format_spec);
158 const void *data = PyUnicode_DATA(format_spec);
159 /* end-pos is used throughout this code to specify the length of
160 the input string */
161 #define READ_spec(index) PyUnicode_READ(kind, data, index)
162
163 Py_ssize_t consumed;
164 int align_specified = 0;
165 int fill_char_specified = 0;
166
167 format->fill_char = ' ';
168 format->align = default_align;
169 format->alternate = 0;
170 format->no_neg_0 = 0;
171 format->sign = '\0';
172 format->width = -1;
173 format->thousands_separators = LT_NO_LOCALE;
174 format->precision = -1;
175 format->type = default_type;
176
177 /* If the second char is an alignment token,
178 then parse the fill char */
179 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
180 format->align = READ_spec(pos+1);
181 format->fill_char = READ_spec(pos);
182 fill_char_specified = 1;
183 align_specified = 1;
184 pos += 2;
185 }
186 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
187 format->align = READ_spec(pos);
188 align_specified = 1;
189 ++pos;
190 }
191
192 /* Parse the various sign options */
193 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
194 format->sign = READ_spec(pos);
195 ++pos;
196 }
197
198 /* If the next character is z, request coercion of negative 0.
199 Applies only to floats. */
200 if (end-pos >= 1 && READ_spec(pos) == 'z') {
201 format->no_neg_0 = 1;
202 ++pos;
203 }
204
205 /* If the next character is #, we're in alternate mode. This only
206 applies to integers. */
207 if (end-pos >= 1 && READ_spec(pos) == '#') {
208 format->alternate = 1;
209 ++pos;
210 }
211
212 /* The special case for 0-padding (backwards compat) */
213 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
214 format->fill_char = '0';
215 if (!align_specified && default_align == '>') {
216 format->align = '=';
217 }
218 ++pos;
219 }
220
221 consumed = get_integer(format_spec, &pos, end, &format->width);
222 if (consumed == -1)
223 /* Overflow error. Exception already set. */
224 return 0;
225
226 /* If consumed is 0, we didn't consume any characters for the
227 width. In that case, reset the width to -1, because
228 get_integer() will have set it to zero. -1 is how we record
229 that the width wasn't specified. */
230 if (consumed == 0)
231 format->width = -1;
232
233 /* Comma signifies add thousands separators */
234 if (end-pos && READ_spec(pos) == ',') {
235 format->thousands_separators = LT_DEFAULT_LOCALE;
236 ++pos;
237 }
238 /* Underscore signifies add thousands separators */
239 if (end-pos && READ_spec(pos) == '_') {
240 if (format->thousands_separators != LT_NO_LOCALE) {
241 invalid_comma_and_underscore();
242 return 0;
243 }
244 format->thousands_separators = LT_UNDERSCORE_LOCALE;
245 ++pos;
246 }
247 if (end-pos && READ_spec(pos) == ',') {
248 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
249 invalid_comma_and_underscore();
250 return 0;
251 }
252 }
253
254 /* Parse field precision */
255 if (end-pos && READ_spec(pos) == '.') {
256 ++pos;
257
258 consumed = get_integer(format_spec, &pos, end, &format->precision);
259 if (consumed == -1)
260 /* Overflow error. Exception already set. */
261 return 0;
262
263 /* Not having a precision after a dot is an error. */
264 if (consumed == 0) {
265 PyErr_Format(PyExc_ValueError,
266 "Format specifier missing precision");
267 return 0;
268 }
269
270 }
271
272 /* Finally, parse the type field. */
273
274 if (end-pos > 1) {
275 /* More than one char remains, so this is an invalid format
276 specifier. */
277 /* Create a temporary object that contains the format spec we're
278 operating on. It's format_spec[start:end] (in Python syntax). */
279 PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
280 (char*)data + kind*start,
281 end-start);
282 if (actual_format_spec != NULL) {
283 PyErr_Format(PyExc_ValueError,
284 "Invalid format specifier '%U' for object of type '%.200s'",
285 actual_format_spec, Py_TYPE(obj)->tp_name);
286 Py_DECREF(actual_format_spec);
287 }
288 return 0;
289 }
290
291 if (end-pos == 1) {
292 format->type = READ_spec(pos);
293 ++pos;
294 }
295
296 /* Do as much validating as we can, just by looking at the format
297 specifier. Do not take into account what type of formatting
298 we're doing (int, float, string). */
299
300 if (format->thousands_separators) {
301 switch (format->type) {
302 case 'd':
303 case 'e':
304 case 'f':
305 case 'g':
306 case 'E':
307 case 'G':
308 case '%':
309 case 'F':
310 case '\0':
311 /* These are allowed. See PEP 378.*/
312 break;
313 case 'b':
314 case 'o':
315 case 'x':
316 case 'X':
317 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
318 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
319 /* Every four digits, not every three, in bin/oct/hex. */
320 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
321 break;
322 }
323 /* fall through */
324 default:
325 invalid_thousands_separator_type(format->thousands_separators, format->type);
326 return 0;
327 }
328 }
329
330 assert (format->align <= 127);
331 assert (format->sign <= 127);
332 return 1;
333 }
334
335 /* Calculate the padding needed. */
336 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)337 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
338 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
339 Py_ssize_t *n_total)
340 {
341 if (width >= 0) {
342 if (nchars > width)
343 *n_total = nchars;
344 else
345 *n_total = width;
346 }
347 else {
348 /* not specified, use all of the chars and no more */
349 *n_total = nchars;
350 }
351
352 /* Figure out how much leading space we need, based on the
353 aligning */
354 if (align == '>')
355 *n_lpadding = *n_total - nchars;
356 else if (align == '^')
357 *n_lpadding = (*n_total - nchars) / 2;
358 else if (align == '<' || align == '=')
359 *n_lpadding = 0;
360 else {
361 /* We should never have an unspecified alignment. */
362 Py_UNREACHABLE();
363 }
364
365 *n_rpadding = *n_total - nchars - *n_lpadding;
366 }
367
368 /* Do the padding, and return a pointer to where the caller-supplied
369 content goes. */
370 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)371 fill_padding(_PyUnicodeWriter *writer,
372 Py_ssize_t nchars,
373 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
374 Py_ssize_t n_rpadding)
375 {
376 Py_ssize_t pos;
377
378 /* Pad on left. */
379 if (n_lpadding) {
380 pos = writer->pos;
381 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
382 }
383
384 /* Pad on right. */
385 if (n_rpadding) {
386 pos = writer->pos + nchars + n_lpadding;
387 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
388 }
389
390 /* Pointer to the user content. */
391 writer->pos += n_lpadding;
392 return 0;
393 }
394
395 /************************************************************************/
396 /*********** common routines for numeric formatting *********************/
397 /************************************************************************/
398
399 /* Locale info needed for formatting integers and the part of floats
400 before and including the decimal. Note that locales only support
401 8-bit chars, not unicode. */
402 typedef struct {
403 PyObject *decimal_point;
404 PyObject *thousands_sep;
405 const char *grouping;
406 char *grouping_buffer;
407 } LocaleInfo;
408
409 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
410
411 /* describes the layout for an integer, see the comment in
412 calc_number_widths() for details */
413 typedef struct {
414 Py_ssize_t n_lpadding;
415 Py_ssize_t n_prefix;
416 Py_ssize_t n_spadding;
417 Py_ssize_t n_rpadding;
418 char sign;
419 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
420 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
421 any grouping chars. */
422 Py_ssize_t n_decimal; /* 0 if only an integer */
423 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424 excluding the decimal itself, if
425 present. */
426
427 /* These 2 are not the widths of fields, but are needed by
428 STRINGLIB_GROUPING. */
429 Py_ssize_t n_digits; /* The number of digits before a decimal
430 or exponent. */
431 Py_ssize_t n_min_width; /* The min_width we used when we computed
432 the n_grouped_digits width. */
433 } NumberFieldWidths;
434
435
436 /* Given a number of the form:
437 digits[remainder]
438 where ptr points to the start and end points to the end, find where
439 the integer part ends. This could be a decimal, an exponent, both,
440 or neither.
441 If a decimal point is present, set *has_decimal and increment
442 remainder beyond it.
443 Results are undefined (but shouldn't crash) for improperly
444 formatted strings.
445 */
446 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)447 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448 Py_ssize_t *n_remainder, int *has_decimal)
449 {
450 Py_ssize_t remainder;
451 int kind = PyUnicode_KIND(s);
452 const void *data = PyUnicode_DATA(s);
453
454 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
455 ++pos;
456 remainder = pos;
457
458 /* Does remainder start with a decimal point? */
459 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
460
461 /* Skip the decimal point. */
462 if (*has_decimal)
463 remainder++;
464
465 *n_remainder = end - remainder;
466 }
467
468 /* not all fields of format are used. for example, precision is
469 unused. should this take discrete params in order to be more clear
470 about what it does? or is passing a single format parameter easier
471 and more efficient enough to justify a little obfuscation?
472 Return -1 on error. */
473 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)474 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475 Py_UCS4 sign_char, Py_ssize_t n_start,
476 Py_ssize_t n_end, Py_ssize_t n_remainder,
477 int has_decimal, const LocaleInfo *locale,
478 const InternalFormatSpec *format, Py_UCS4 *maxchar)
479 {
480 Py_ssize_t n_non_digit_non_padding;
481 Py_ssize_t n_padding;
482
483 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
484 spec->n_lpadding = 0;
485 spec->n_prefix = n_prefix;
486 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487 spec->n_remainder = n_remainder;
488 spec->n_spadding = 0;
489 spec->n_rpadding = 0;
490 spec->sign = '\0';
491 spec->n_sign = 0;
492
493 /* the output will look like:
494 | |
495 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
496 | |
497
498 sign is computed from format->sign and the actual
499 sign of the number
500
501 prefix is given (it's for the '0x' prefix)
502
503 digits is already known
504
505 the total width is either given, or computed from the
506 actual digits
507
508 only one of lpadding, spadding, and rpadding can be non-zero,
509 and it's calculated from the width and other fields
510 */
511
512 /* compute the various parts we're going to write */
513 switch (format->sign) {
514 case '+':
515 /* always put a + or - */
516 spec->n_sign = 1;
517 spec->sign = (sign_char == '-' ? '-' : '+');
518 break;
519 case ' ':
520 spec->n_sign = 1;
521 spec->sign = (sign_char == '-' ? '-' : ' ');
522 break;
523 default:
524 /* Not specified, or the default (-) */
525 if (sign_char == '-') {
526 spec->n_sign = 1;
527 spec->sign = '-';
528 }
529 }
530
531 /* The number of chars used for non-digits and non-padding. */
532 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533 spec->n_remainder;
534
535 /* min_width can go negative, that's okay. format->width == -1 means
536 we don't care. */
537 if (format->fill_char == '0' && format->align == '=')
538 spec->n_min_width = format->width - n_non_digit_non_padding;
539 else
540 spec->n_min_width = 0;
541
542 if (spec->n_digits == 0)
543 /* This case only occurs when using 'c' formatting, we need
544 to special case it because the grouping code always wants
545 to have at least one character. */
546 spec->n_grouped_digits = 0;
547 else {
548 Py_UCS4 grouping_maxchar;
549 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
550 NULL, 0,
551 NULL, 0, spec->n_digits,
552 spec->n_min_width,
553 locale->grouping, locale->thousands_sep, &grouping_maxchar);
554 if (spec->n_grouped_digits == -1) {
555 return -1;
556 }
557 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
558 }
559
560 /* Given the desired width and the total of digit and non-digit
561 space we consume, see if we need any padding. format->width can
562 be negative (meaning no padding), but this code still works in
563 that case. */
564 n_padding = format->width -
565 (n_non_digit_non_padding + spec->n_grouped_digits);
566 if (n_padding > 0) {
567 /* Some padding is needed. Determine if it's left, space, or right. */
568 switch (format->align) {
569 case '<':
570 spec->n_rpadding = n_padding;
571 break;
572 case '^':
573 spec->n_lpadding = n_padding / 2;
574 spec->n_rpadding = n_padding - spec->n_lpadding;
575 break;
576 case '=':
577 spec->n_spadding = n_padding;
578 break;
579 case '>':
580 spec->n_lpadding = n_padding;
581 break;
582 default:
583 /* Shouldn't get here */
584 Py_UNREACHABLE();
585 }
586 }
587
588 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
589 *maxchar = Py_MAX(*maxchar, format->fill_char);
590
591 if (spec->n_decimal)
592 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
593
594 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596 spec->n_remainder + spec->n_rpadding;
597 }
598
599 /* Fill in the digit parts of a number's string representation,
600 as determined in calc_number_widths().
601 Return -1 on error, or 0 on success. */
602 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)603 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
604 PyObject *digits, Py_ssize_t d_start,
605 PyObject *prefix, Py_ssize_t p_start,
606 Py_UCS4 fill_char,
607 LocaleInfo *locale, int toupper)
608 {
609 /* Used to keep track of digits, decimal, and remainder. */
610 Py_ssize_t d_pos = d_start;
611 const unsigned int kind = writer->kind;
612 const void *data = writer->data;
613 Py_ssize_t r;
614
615 if (spec->n_lpadding) {
616 _PyUnicode_FastFill(writer->buffer,
617 writer->pos, spec->n_lpadding, fill_char);
618 writer->pos += spec->n_lpadding;
619 }
620 if (spec->n_sign == 1) {
621 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
622 writer->pos++;
623 }
624 if (spec->n_prefix) {
625 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
626 prefix, p_start,
627 spec->n_prefix);
628 if (toupper) {
629 Py_ssize_t t;
630 for (t = 0; t < spec->n_prefix; t++) {
631 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
632 c = Py_TOUPPER(c);
633 assert (c <= 127);
634 PyUnicode_WRITE(kind, data, writer->pos + t, c);
635 }
636 }
637 writer->pos += spec->n_prefix;
638 }
639 if (spec->n_spadding) {
640 _PyUnicode_FastFill(writer->buffer,
641 writer->pos, spec->n_spadding, fill_char);
642 writer->pos += spec->n_spadding;
643 }
644
645 /* Only for type 'c' special case, it has no digits. */
646 if (spec->n_digits != 0) {
647 /* Fill the digits with InsertThousandsGrouping. */
648 r = _PyUnicode_InsertThousandsGrouping(
649 writer, spec->n_grouped_digits,
650 digits, d_pos, spec->n_digits,
651 spec->n_min_width,
652 locale->grouping, locale->thousands_sep, NULL);
653 if (r == -1)
654 return -1;
655 assert(r == spec->n_grouped_digits);
656 d_pos += spec->n_digits;
657 }
658 if (toupper) {
659 Py_ssize_t t;
660 for (t = 0; t < spec->n_grouped_digits; t++) {
661 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
662 c = Py_TOUPPER(c);
663 if (c > 127) {
664 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
665 return -1;
666 }
667 PyUnicode_WRITE(kind, data, writer->pos + t, c);
668 }
669 }
670 writer->pos += spec->n_grouped_digits;
671
672 if (spec->n_decimal) {
673 _PyUnicode_FastCopyCharacters(
674 writer->buffer, writer->pos,
675 locale->decimal_point, 0, spec->n_decimal);
676 writer->pos += spec->n_decimal;
677 d_pos += 1;
678 }
679
680 if (spec->n_remainder) {
681 _PyUnicode_FastCopyCharacters(
682 writer->buffer, writer->pos,
683 digits, d_pos, spec->n_remainder);
684 writer->pos += spec->n_remainder;
685 /* d_pos += spec->n_remainder; */
686 }
687
688 if (spec->n_rpadding) {
689 _PyUnicode_FastFill(writer->buffer,
690 writer->pos, spec->n_rpadding,
691 fill_char);
692 writer->pos += spec->n_rpadding;
693 }
694 return 0;
695 }
696
697 static const char no_grouping[1] = {CHAR_MAX};
698
699 /* Find the decimal point character(s?), thousands_separator(s?), and
700 grouping description, either for the current locale if type is
701 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)704 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
705 {
706 switch (type) {
707 case LT_CURRENT_LOCALE: {
708 struct lconv *lc = localeconv();
709 if (_Py_GetLocaleconvNumeric(lc,
710 &locale_info->decimal_point,
711 &locale_info->thousands_sep) < 0) {
712 return -1;
713 }
714
715 /* localeconv() grouping can become a dangling pointer or point
716 to a different string if another thread calls localeconv() during
717 the string formatting. Copy the string to avoid this risk. */
718 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
719 if (locale_info->grouping_buffer == NULL) {
720 PyErr_NoMemory();
721 return -1;
722 }
723 locale_info->grouping = locale_info->grouping_buffer;
724 break;
725 }
726 case LT_DEFAULT_LOCALE:
727 case LT_UNDERSCORE_LOCALE:
728 case LT_UNDER_FOUR_LOCALE:
729 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
730 locale_info->thousands_sep = PyUnicode_FromOrdinal(
731 type == LT_DEFAULT_LOCALE ? ',' : '_');
732 if (!locale_info->decimal_point || !locale_info->thousands_sep)
733 return -1;
734 if (type != LT_UNDER_FOUR_LOCALE)
735 locale_info->grouping = "\3"; /* Group every 3 characters. The
736 (implicit) trailing 0 means repeat
737 infinitely. */
738 else
739 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
740 break;
741 case LT_NO_LOCALE:
742 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
743 locale_info->thousands_sep = PyUnicode_New(0, 0);
744 if (!locale_info->decimal_point || !locale_info->thousands_sep)
745 return -1;
746 locale_info->grouping = no_grouping;
747 break;
748 }
749 return 0;
750 }
751
752 static void
free_locale_info(LocaleInfo * locale_info)753 free_locale_info(LocaleInfo *locale_info)
754 {
755 Py_XDECREF(locale_info->decimal_point);
756 Py_XDECREF(locale_info->thousands_sep);
757 PyMem_Free(locale_info->grouping_buffer);
758 }
759
760 /************************************************************************/
761 /*********** string formatting ******************************************/
762 /************************************************************************/
763
764 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)765 format_string_internal(PyObject *value, const InternalFormatSpec *format,
766 _PyUnicodeWriter *writer)
767 {
768 Py_ssize_t lpad;
769 Py_ssize_t rpad;
770 Py_ssize_t total;
771 Py_ssize_t len;
772 int result = -1;
773 Py_UCS4 maxchar;
774
775 assert(PyUnicode_IS_READY(value));
776 len = PyUnicode_GET_LENGTH(value);
777
778 /* sign is not allowed on strings */
779 if (format->sign != '\0') {
780 if (format->sign == ' ') {
781 PyErr_SetString(PyExc_ValueError,
782 "Space not allowed in string format specifier");
783 }
784 else {
785 PyErr_SetString(PyExc_ValueError,
786 "Sign not allowed in string format specifier");
787 }
788 goto done;
789 }
790
791 /* negative 0 coercion is not allowed on strings */
792 if (format->no_neg_0) {
793 PyErr_SetString(PyExc_ValueError,
794 "Negative zero coercion (z) not allowed in string format "
795 "specifier");
796 goto done;
797 }
798
799 /* alternate is not allowed on strings */
800 if (format->alternate) {
801 PyErr_SetString(PyExc_ValueError,
802 "Alternate form (#) not allowed in string format "
803 "specifier");
804 goto done;
805 }
806
807 /* '=' alignment not allowed on strings */
808 if (format->align == '=') {
809 PyErr_SetString(PyExc_ValueError,
810 "'=' alignment not allowed "
811 "in string format specifier");
812 goto done;
813 }
814
815 if ((format->width == -1 || format->width <= len)
816 && (format->precision == -1 || format->precision >= len)) {
817 /* Fast path */
818 return _PyUnicodeWriter_WriteStr(writer, value);
819 }
820
821 /* if precision is specified, output no more that format.precision
822 characters */
823 if (format->precision >= 0 && len >= format->precision) {
824 len = format->precision;
825 }
826
827 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
828
829 maxchar = writer->maxchar;
830 if (lpad != 0 || rpad != 0)
831 maxchar = Py_MAX(maxchar, format->fill_char);
832 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
833 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
834 maxchar = Py_MAX(maxchar, valmaxchar);
835 }
836
837 /* allocate the resulting string */
838 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
839 goto done;
840
841 /* Write into that space. First the padding. */
842 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
843 if (result == -1)
844 goto done;
845
846 /* Then the source string. */
847 if (len) {
848 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
849 value, 0, len);
850 }
851 writer->pos += (len + rpad);
852 result = 0;
853
854 done:
855 return result;
856 }
857
858
859 /************************************************************************/
860 /*********** long formatting ********************************************/
861 /************************************************************************/
862
863 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)864 format_long_internal(PyObject *value, const InternalFormatSpec *format,
865 _PyUnicodeWriter *writer)
866 {
867 int result = -1;
868 Py_UCS4 maxchar = 127;
869 PyObject *tmp = NULL;
870 Py_ssize_t inumeric_chars;
871 Py_UCS4 sign_char = '\0';
872 Py_ssize_t n_digits; /* count of digits need from the computed
873 string */
874 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
875 produces non-digits */
876 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
877 Py_ssize_t n_total;
878 Py_ssize_t prefix = 0;
879 NumberFieldWidths spec;
880 long x;
881
882 /* Locale settings, either from the actual locale or
883 from a hard-code pseudo-locale */
884 LocaleInfo locale = LocaleInfo_STATIC_INIT;
885
886 /* no precision allowed on integers */
887 if (format->precision != -1) {
888 PyErr_SetString(PyExc_ValueError,
889 "Precision not allowed in integer format specifier");
890 goto done;
891 }
892 /* no negative zero coercion on integers */
893 if (format->no_neg_0) {
894 PyErr_SetString(PyExc_ValueError,
895 "Negative zero coercion (z) not allowed in integer"
896 " format specifier");
897 goto done;
898 }
899
900 /* special case for character formatting */
901 if (format->type == 'c') {
902 /* error to specify a sign */
903 if (format->sign != '\0') {
904 PyErr_SetString(PyExc_ValueError,
905 "Sign not allowed with integer"
906 " format specifier 'c'");
907 goto done;
908 }
909 /* error to request alternate format */
910 if (format->alternate) {
911 PyErr_SetString(PyExc_ValueError,
912 "Alternate form (#) not allowed with integer"
913 " format specifier 'c'");
914 goto done;
915 }
916
917 /* taken from unicodeobject.c formatchar() */
918 /* Integer input truncated to a character */
919 x = PyLong_AsLong(value);
920 if (x == -1 && PyErr_Occurred())
921 goto done;
922 if (x < 0 || x > 0x10ffff) {
923 PyErr_SetString(PyExc_OverflowError,
924 "%c arg not in range(0x110000)");
925 goto done;
926 }
927 tmp = PyUnicode_FromOrdinal(x);
928 inumeric_chars = 0;
929 n_digits = 1;
930 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
931
932 /* As a sort-of hack, we tell calc_number_widths that we only
933 have "remainder" characters. calc_number_widths thinks
934 these are characters that don't get formatted, only copied
935 into the output string. We do this for 'c' formatting,
936 because the characters are likely to be non-digits. */
937 n_remainder = 1;
938 }
939 else {
940 int base;
941 int leading_chars_to_skip = 0; /* Number of characters added by
942 PyNumber_ToBase that we want to
943 skip over. */
944
945 /* Compute the base and how many characters will be added by
946 PyNumber_ToBase */
947 switch (format->type) {
948 case 'b':
949 base = 2;
950 leading_chars_to_skip = 2; /* 0b */
951 break;
952 case 'o':
953 base = 8;
954 leading_chars_to_skip = 2; /* 0o */
955 break;
956 case 'x':
957 case 'X':
958 base = 16;
959 leading_chars_to_skip = 2; /* 0x */
960 break;
961 default: /* shouldn't be needed, but stops a compiler warning */
962 case 'd':
963 case 'n':
964 base = 10;
965 break;
966 }
967
968 if (format->sign != '+' && format->sign != ' '
969 && format->width == -1
970 && format->type != 'X' && format->type != 'n'
971 && !format->thousands_separators
972 && PyLong_CheckExact(value))
973 {
974 /* Fast path */
975 return _PyLong_FormatWriter(writer, value, base, format->alternate);
976 }
977
978 /* The number of prefix chars is the same as the leading
979 chars to skip */
980 if (format->alternate)
981 n_prefix = leading_chars_to_skip;
982
983 /* Do the hard part, converting to a string in a given base */
984 tmp = _PyLong_Format(value, base);
985 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
986 goto done;
987
988 inumeric_chars = 0;
989 n_digits = PyUnicode_GET_LENGTH(tmp);
990
991 prefix = inumeric_chars;
992
993 /* Is a sign character present in the output? If so, remember it
994 and skip it */
995 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
996 sign_char = '-';
997 ++prefix;
998 ++leading_chars_to_skip;
999 }
1000
1001 /* Skip over the leading chars (0x, 0b, etc.) */
1002 n_digits -= leading_chars_to_skip;
1003 inumeric_chars += leading_chars_to_skip;
1004 }
1005
1006 /* Determine the grouping, separator, and decimal point, if any. */
1007 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008 format->thousands_separators,
1009 &locale) == -1)
1010 goto done;
1011
1012 /* Calculate how much memory we'll need. */
1013 n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014 inumeric_chars + n_digits, n_remainder, 0,
1015 &locale, format, &maxchar);
1016 if (n_total == -1) {
1017 goto done;
1018 }
1019
1020 /* Allocate the memory. */
1021 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1022 goto done;
1023
1024 /* Populate the memory. */
1025 result = fill_number(writer, &spec,
1026 tmp, inumeric_chars,
1027 tmp, prefix, format->fill_char,
1028 &locale, format->type == 'X');
1029
1030 done:
1031 Py_XDECREF(tmp);
1032 free_locale_info(&locale);
1033 return result;
1034 }
1035
1036 /************************************************************************/
1037 /*********** float formatting *******************************************/
1038 /************************************************************************/
1039
1040 /* much of this is taken from unicodeobject.c */
1041 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1042 format_float_internal(PyObject *value,
1043 const InternalFormatSpec *format,
1044 _PyUnicodeWriter *writer)
1045 {
1046 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1047 Py_ssize_t n_digits;
1048 Py_ssize_t n_remainder;
1049 Py_ssize_t n_total;
1050 int has_decimal;
1051 double val;
1052 int precision, default_precision = 6;
1053 Py_UCS4 type = format->type;
1054 int add_pct = 0;
1055 Py_ssize_t index;
1056 NumberFieldWidths spec;
1057 int flags = 0;
1058 int result = -1;
1059 Py_UCS4 maxchar = 127;
1060 Py_UCS4 sign_char = '\0';
1061 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1062 PyObject *unicode_tmp = NULL;
1063
1064 /* Locale settings, either from the actual locale or
1065 from a hard-code pseudo-locale */
1066 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1067
1068 if (format->precision > INT_MAX) {
1069 PyErr_SetString(PyExc_ValueError, "precision too big");
1070 goto done;
1071 }
1072 precision = (int)format->precision;
1073
1074 if (format->alternate)
1075 flags |= Py_DTSF_ALT;
1076 if (format->no_neg_0)
1077 flags |= Py_DTSF_NO_NEG_0;
1078
1079 if (type == '\0') {
1080 /* Omitted type specifier. Behaves in the same way as repr(x)
1081 and str(x) if no precision is given, else like 'g', but with
1082 at least one digit after the decimal point. */
1083 flags |= Py_DTSF_ADD_DOT_0;
1084 type = 'r';
1085 default_precision = 0;
1086 }
1087
1088 if (type == 'n')
1089 /* 'n' is the same as 'g', except for the locale used to
1090 format the result. We take care of that later. */
1091 type = 'g';
1092
1093 val = PyFloat_AsDouble(value);
1094 if (val == -1.0 && PyErr_Occurred())
1095 goto done;
1096
1097 if (type == '%') {
1098 type = 'f';
1099 val *= 100;
1100 add_pct = 1;
1101 }
1102
1103 if (precision < 0)
1104 precision = default_precision;
1105 else if (type == 'r')
1106 type = 'g';
1107
1108 /* Cast "type", because if we're in unicode we need to pass an
1109 8-bit char. This is safe, because we've restricted what "type"
1110 can be. */
1111 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1112 &float_type);
1113 if (buf == NULL)
1114 goto done;
1115 n_digits = strlen(buf);
1116
1117 if (add_pct) {
1118 /* We know that buf has a trailing zero (since we just called
1119 strlen() on it), and we don't use that fact any more. So we
1120 can just write over the trailing zero. */
1121 buf[n_digits] = '%';
1122 n_digits += 1;
1123 }
1124
1125 if (format->sign != '+' && format->sign != ' '
1126 && format->width == -1
1127 && format->type != 'n'
1128 && !format->thousands_separators)
1129 {
1130 /* Fast path */
1131 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1132 PyMem_Free(buf);
1133 return result;
1134 }
1135
1136 /* Since there is no unicode version of PyOS_double_to_string,
1137 just use the 8 bit version and then convert to unicode. */
1138 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1139 PyMem_Free(buf);
1140 if (unicode_tmp == NULL)
1141 goto done;
1142
1143 /* Is a sign character present in the output? If so, remember it
1144 and skip it */
1145 index = 0;
1146 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1147 sign_char = '-';
1148 ++index;
1149 --n_digits;
1150 }
1151
1152 /* Determine if we have any "remainder" (after the digits, might include
1153 decimal or exponent or both (or neither)) */
1154 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1155
1156 /* Determine the grouping, separator, and decimal point, if any. */
1157 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1158 format->thousands_separators,
1159 &locale) == -1)
1160 goto done;
1161
1162 /* Calculate how much memory we'll need. */
1163 n_total = calc_number_widths(&spec, 0, sign_char, index,
1164 index + n_digits, n_remainder, has_decimal,
1165 &locale, format, &maxchar);
1166 if (n_total == -1) {
1167 goto done;
1168 }
1169
1170 /* Allocate the memory. */
1171 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1172 goto done;
1173
1174 /* Populate the memory. */
1175 result = fill_number(writer, &spec,
1176 unicode_tmp, index,
1177 NULL, 0, format->fill_char,
1178 &locale, 0);
1179
1180 done:
1181 Py_XDECREF(unicode_tmp);
1182 free_locale_info(&locale);
1183 return result;
1184 }
1185
1186 /************************************************************************/
1187 /*********** complex formatting *****************************************/
1188 /************************************************************************/
1189
1190 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1191 format_complex_internal(PyObject *value,
1192 const InternalFormatSpec *format,
1193 _PyUnicodeWriter *writer)
1194 {
1195 double re;
1196 double im;
1197 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1198 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1199
1200 InternalFormatSpec tmp_format = *format;
1201 Py_ssize_t n_re_digits;
1202 Py_ssize_t n_im_digits;
1203 Py_ssize_t n_re_remainder;
1204 Py_ssize_t n_im_remainder;
1205 Py_ssize_t n_re_total;
1206 Py_ssize_t n_im_total;
1207 int re_has_decimal;
1208 int im_has_decimal;
1209 int precision, default_precision = 6;
1210 Py_UCS4 type = format->type;
1211 Py_ssize_t i_re;
1212 Py_ssize_t i_im;
1213 NumberFieldWidths re_spec;
1214 NumberFieldWidths im_spec;
1215 int flags = 0;
1216 int result = -1;
1217 Py_UCS4 maxchar = 127;
1218 enum PyUnicode_Kind rkind;
1219 void *rdata;
1220 Py_UCS4 re_sign_char = '\0';
1221 Py_UCS4 im_sign_char = '\0';
1222 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1223 int im_float_type;
1224 int add_parens = 0;
1225 int skip_re = 0;
1226 Py_ssize_t lpad;
1227 Py_ssize_t rpad;
1228 Py_ssize_t total;
1229 PyObject *re_unicode_tmp = NULL;
1230 PyObject *im_unicode_tmp = NULL;
1231
1232 /* Locale settings, either from the actual locale or
1233 from a hard-code pseudo-locale */
1234 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1235
1236 if (format->precision > INT_MAX) {
1237 PyErr_SetString(PyExc_ValueError, "precision too big");
1238 goto done;
1239 }
1240 precision = (int)format->precision;
1241
1242 /* Zero padding is not allowed. */
1243 if (format->fill_char == '0') {
1244 PyErr_SetString(PyExc_ValueError,
1245 "Zero padding is not allowed in complex format "
1246 "specifier");
1247 goto done;
1248 }
1249
1250 /* Neither is '=' alignment . */
1251 if (format->align == '=') {
1252 PyErr_SetString(PyExc_ValueError,
1253 "'=' alignment flag is not allowed in complex format "
1254 "specifier");
1255 goto done;
1256 }
1257
1258 re = PyComplex_RealAsDouble(value);
1259 if (re == -1.0 && PyErr_Occurred())
1260 goto done;
1261 im = PyComplex_ImagAsDouble(value);
1262 if (im == -1.0 && PyErr_Occurred())
1263 goto done;
1264
1265 if (format->alternate)
1266 flags |= Py_DTSF_ALT;
1267 if (format->no_neg_0)
1268 flags |= Py_DTSF_NO_NEG_0;
1269
1270 if (type == '\0') {
1271 /* Omitted type specifier. Should be like str(self). */
1272 type = 'r';
1273 default_precision = 0;
1274 if (re == 0.0 && copysign(1.0, re) == 1.0)
1275 skip_re = 1;
1276 else
1277 add_parens = 1;
1278 }
1279
1280 if (type == 'n')
1281 /* 'n' is the same as 'g', except for the locale used to
1282 format the result. We take care of that later. */
1283 type = 'g';
1284
1285 if (precision < 0)
1286 precision = default_precision;
1287 else if (type == 'r')
1288 type = 'g';
1289
1290 /* Cast "type", because if we're in unicode we need to pass an
1291 8-bit char. This is safe, because we've restricted what "type"
1292 can be. */
1293 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1294 &re_float_type);
1295 if (re_buf == NULL)
1296 goto done;
1297 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1298 &im_float_type);
1299 if (im_buf == NULL)
1300 goto done;
1301
1302 n_re_digits = strlen(re_buf);
1303 n_im_digits = strlen(im_buf);
1304
1305 /* Since there is no unicode version of PyOS_double_to_string,
1306 just use the 8 bit version and then convert to unicode. */
1307 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1308 if (re_unicode_tmp == NULL)
1309 goto done;
1310 i_re = 0;
1311
1312 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1313 if (im_unicode_tmp == NULL)
1314 goto done;
1315 i_im = 0;
1316
1317 /* Is a sign character present in the output? If so, remember it
1318 and skip it */
1319 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1320 re_sign_char = '-';
1321 ++i_re;
1322 --n_re_digits;
1323 }
1324 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1325 im_sign_char = '-';
1326 ++i_im;
1327 --n_im_digits;
1328 }
1329
1330 /* Determine if we have any "remainder" (after the digits, might include
1331 decimal or exponent or both (or neither)) */
1332 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333 &n_re_remainder, &re_has_decimal);
1334 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335 &n_im_remainder, &im_has_decimal);
1336
1337 /* Determine the grouping, separator, and decimal point, if any. */
1338 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1339 format->thousands_separators,
1340 &locale) == -1)
1341 goto done;
1342
1343 /* Turn off any padding. We'll do it later after we've composed
1344 the numbers without padding. */
1345 tmp_format.fill_char = '\0';
1346 tmp_format.align = '<';
1347 tmp_format.width = -1;
1348
1349 /* Calculate how much memory we'll need. */
1350 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1351 i_re, i_re + n_re_digits, n_re_remainder,
1352 re_has_decimal, &locale, &tmp_format,
1353 &maxchar);
1354 if (n_re_total == -1) {
1355 goto done;
1356 }
1357
1358 /* Same formatting, but always include a sign, unless the real part is
1359 * going to be omitted, in which case we use whatever sign convention was
1360 * requested by the original format. */
1361 if (!skip_re)
1362 tmp_format.sign = '+';
1363 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1364 i_im, i_im + n_im_digits, n_im_remainder,
1365 im_has_decimal, &locale, &tmp_format,
1366 &maxchar);
1367 if (n_im_total == -1) {
1368 goto done;
1369 }
1370
1371 if (skip_re)
1372 n_re_total = 0;
1373
1374 /* Add 1 for the 'j', and optionally 2 for parens. */
1375 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1376 format->width, format->align, &lpad, &rpad, &total);
1377
1378 if (lpad || rpad)
1379 maxchar = Py_MAX(maxchar, format->fill_char);
1380
1381 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1382 goto done;
1383 rkind = writer->kind;
1384 rdata = writer->data;
1385
1386 /* Populate the memory. First, the padding. */
1387 result = fill_padding(writer,
1388 n_re_total + n_im_total + 1 + add_parens * 2,
1389 format->fill_char, lpad, rpad);
1390 if (result == -1)
1391 goto done;
1392
1393 if (add_parens) {
1394 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1395 writer->pos++;
1396 }
1397
1398 if (!skip_re) {
1399 result = fill_number(writer, &re_spec,
1400 re_unicode_tmp, i_re,
1401 NULL, 0,
1402 0,
1403 &locale, 0);
1404 if (result == -1)
1405 goto done;
1406 }
1407 result = fill_number(writer, &im_spec,
1408 im_unicode_tmp, i_im,
1409 NULL, 0,
1410 0,
1411 &locale, 0);
1412 if (result == -1)
1413 goto done;
1414 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1415 writer->pos++;
1416
1417 if (add_parens) {
1418 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1419 writer->pos++;
1420 }
1421
1422 writer->pos += rpad;
1423
1424 done:
1425 PyMem_Free(re_buf);
1426 PyMem_Free(im_buf);
1427 Py_XDECREF(re_unicode_tmp);
1428 Py_XDECREF(im_unicode_tmp);
1429 free_locale_info(&locale);
1430 return result;
1431 }
1432
1433 /************************************************************************/
1434 /*********** built in formatters ****************************************/
1435 /************************************************************************/
1436 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1437 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1438 {
1439 PyObject *str;
1440 int err;
1441
1442 str = PyObject_Str(obj);
1443 if (str == NULL)
1444 return -1;
1445 err = _PyUnicodeWriter_WriteStr(writer, str);
1446 Py_DECREF(str);
1447 return err;
1448 }
1449
1450 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1451 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1452 PyObject *obj,
1453 PyObject *format_spec,
1454 Py_ssize_t start, Py_ssize_t end)
1455 {
1456 InternalFormatSpec format;
1457
1458 assert(PyUnicode_Check(obj));
1459
1460 /* check for the special case of zero length format spec, make
1461 it equivalent to str(obj) */
1462 if (start == end) {
1463 if (PyUnicode_CheckExact(obj))
1464 return _PyUnicodeWriter_WriteStr(writer, obj);
1465 else
1466 return format_obj(obj, writer);
1467 }
1468
1469 /* parse the format_spec */
1470 if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1471 &format, 's', '<'))
1472 return -1;
1473
1474 /* type conversion? */
1475 switch (format.type) {
1476 case 's':
1477 /* no type conversion needed, already a string. do the formatting */
1478 return format_string_internal(obj, &format, writer);
1479 default:
1480 /* unknown */
1481 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1482 return -1;
1483 }
1484 }
1485
1486 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1487 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1488 PyObject *obj,
1489 PyObject *format_spec,
1490 Py_ssize_t start, Py_ssize_t end)
1491 {
1492 PyObject *tmp = NULL;
1493 InternalFormatSpec format;
1494 int result = -1;
1495
1496 /* check for the special case of zero length format spec, make
1497 it equivalent to str(obj) */
1498 if (start == end) {
1499 if (PyLong_CheckExact(obj))
1500 return _PyLong_FormatWriter(writer, obj, 10, 0);
1501 else
1502 return format_obj(obj, writer);
1503 }
1504
1505 /* parse the format_spec */
1506 if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1507 &format, 'd', '>'))
1508 goto done;
1509
1510 /* type conversion? */
1511 switch (format.type) {
1512 case 'b':
1513 case 'c':
1514 case 'd':
1515 case 'o':
1516 case 'x':
1517 case 'X':
1518 case 'n':
1519 /* no type conversion needed, already an int. do the formatting */
1520 result = format_long_internal(obj, &format, writer);
1521 break;
1522
1523 case 'e':
1524 case 'E':
1525 case 'f':
1526 case 'F':
1527 case 'g':
1528 case 'G':
1529 case '%':
1530 /* convert to float */
1531 tmp = PyNumber_Float(obj);
1532 if (tmp == NULL)
1533 goto done;
1534 result = format_float_internal(tmp, &format, writer);
1535 break;
1536
1537 default:
1538 /* unknown */
1539 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1540 goto done;
1541 }
1542
1543 done:
1544 Py_XDECREF(tmp);
1545 return result;
1546 }
1547
1548 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1549 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1550 PyObject *obj,
1551 PyObject *format_spec,
1552 Py_ssize_t start, Py_ssize_t end)
1553 {
1554 InternalFormatSpec format;
1555
1556 /* check for the special case of zero length format spec, make
1557 it equivalent to str(obj) */
1558 if (start == end)
1559 return format_obj(obj, writer);
1560
1561 /* parse the format_spec */
1562 if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1563 &format, '\0', '>'))
1564 return -1;
1565
1566 /* type conversion? */
1567 switch (format.type) {
1568 case '\0': /* No format code: like 'g', but with at least one decimal. */
1569 case 'e':
1570 case 'E':
1571 case 'f':
1572 case 'F':
1573 case 'g':
1574 case 'G':
1575 case 'n':
1576 case '%':
1577 /* no conversion, already a float. do the formatting */
1578 return format_float_internal(obj, &format, writer);
1579
1580 default:
1581 /* unknown */
1582 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1583 return -1;
1584 }
1585 }
1586
1587 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1588 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1589 PyObject *obj,
1590 PyObject *format_spec,
1591 Py_ssize_t start, Py_ssize_t end)
1592 {
1593 InternalFormatSpec format;
1594
1595 /* check for the special case of zero length format spec, make
1596 it equivalent to str(obj) */
1597 if (start == end)
1598 return format_obj(obj, writer);
1599
1600 /* parse the format_spec */
1601 if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1602 &format, '\0', '>'))
1603 return -1;
1604
1605 /* type conversion? */
1606 switch (format.type) {
1607 case '\0': /* No format code: like 'g', but with at least one decimal. */
1608 case 'e':
1609 case 'E':
1610 case 'f':
1611 case 'F':
1612 case 'g':
1613 case 'G':
1614 case 'n':
1615 /* no conversion, already a complex. do the formatting */
1616 return format_complex_internal(obj, &format, writer);
1617
1618 default:
1619 /* unknown */
1620 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1621 return -1;
1622 }
1623 }
1624