1 /*
2 unicode_format.h -- implementation of str.format().
3 */
4
5 #include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter()
6
7 /************************************************************************/
8 /*********** Global data structures and forward declarations *********/
9 /************************************************************************/
10
11 /*
12 A SubString consists of the characters between two string or
13 unicode pointers.
14 */
15 typedef struct {
16 PyObject *str; /* borrowed reference */
17 Py_ssize_t start, end;
18 } SubString;
19
20
21 typedef enum {
22 ANS_INIT,
23 ANS_AUTO,
24 ANS_MANUAL
25 } AutoNumberState; /* Keep track if we're auto-numbering fields */
26
27 /* Keeps track of our auto-numbering state, and which number field we're on */
28 typedef struct {
29 AutoNumberState an_state;
30 int an_field_number;
31 } AutoNumber;
32
33
34 /* forward declaration for recursion */
35 static PyObject *
36 build_string(SubString *input, PyObject *args, PyObject *kwargs,
37 int recursion_depth, AutoNumber *auto_number);
38
39
40
41 /************************************************************************/
42 /************************** Utility functions ************************/
43 /************************************************************************/
44
45 static void
AutoNumber_Init(AutoNumber * auto_number)46 AutoNumber_Init(AutoNumber *auto_number)
47 {
48 auto_number->an_state = ANS_INIT;
49 auto_number->an_field_number = 0;
50 }
51
52 /* fill in a SubString from a pointer and length */
53 Py_LOCAL_INLINE(void)
SubString_init(SubString * str,PyObject * s,Py_ssize_t start,Py_ssize_t end)54 SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
55 {
56 str->str = s;
57 str->start = start;
58 str->end = end;
59 }
60
61 /* return a new string. if str->str is NULL, return None */
62 Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString * str)63 SubString_new_object(SubString *str)
64 {
65 if (str->str == NULL)
66 Py_RETURN_NONE;
67 return PyUnicode_Substring(str->str, str->start, str->end);
68 }
69
70 /* return a new string. if str->str is NULL, return a new empty string */
71 Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString * str)72 SubString_new_object_or_empty(SubString *str)
73 {
74 if (str->str == NULL) {
75 return PyUnicode_New(0, 0);
76 }
77 return SubString_new_object(str);
78 }
79
80 /* Return 1 if an error has been detected switching between automatic
81 field numbering and manual field specification, else return 0. Set
82 ValueError on error. */
83 static int
autonumber_state_error(AutoNumberState state,int field_name_is_empty)84 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85 {
86 if (state == ANS_MANUAL) {
87 if (field_name_is_empty) {
88 PyErr_SetString(PyExc_ValueError, "cannot switch from "
89 "manual field specification to "
90 "automatic field numbering");
91 return 1;
92 }
93 }
94 else {
95 if (!field_name_is_empty) {
96 PyErr_SetString(PyExc_ValueError, "cannot switch from "
97 "automatic field numbering to "
98 "manual field specification");
99 return 1;
100 }
101 }
102 return 0;
103 }
104
105
106 /************************************************************************/
107 /*********** Format string parsing -- integers and identifiers *********/
108 /************************************************************************/
109
110 static Py_ssize_t
get_integer(const SubString * str)111 get_integer(const SubString *str)
112 {
113 Py_ssize_t accumulator = 0;
114 Py_ssize_t digitval;
115 Py_ssize_t i;
116
117 /* empty string is an error */
118 if (str->start >= str->end)
119 return -1;
120
121 for (i = str->start; i < str->end; i++) {
122 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
123 if (digitval < 0)
124 return -1;
125 /*
126 Detect possible overflow before it happens:
127
128 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
130 */
131 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
132 PyErr_Format(PyExc_ValueError,
133 "Too many decimal digits in format string");
134 return -1;
135 }
136 accumulator = accumulator * 10 + digitval;
137 }
138 return accumulator;
139 }
140
141 /************************************************************************/
142 /******** Functions to get field objects and specification strings ******/
143 /************************************************************************/
144
145 /* do the equivalent of obj.name */
146 static PyObject *
getattr(PyObject * obj,SubString * name)147 getattr(PyObject *obj, SubString *name)
148 {
149 PyObject *newobj;
150 PyObject *str = SubString_new_object(name);
151 if (str == NULL)
152 return NULL;
153 newobj = PyObject_GetAttr(obj, str);
154 Py_DECREF(str);
155 return newobj;
156 }
157
158 /* do the equivalent of obj[idx], where obj is a sequence */
159 static PyObject *
getitem_sequence(PyObject * obj,Py_ssize_t idx)160 getitem_sequence(PyObject *obj, Py_ssize_t idx)
161 {
162 return PySequence_GetItem(obj, idx);
163 }
164
165 /* do the equivalent of obj[idx], where obj is not a sequence */
166 static PyObject *
getitem_idx(PyObject * obj,Py_ssize_t idx)167 getitem_idx(PyObject *obj, Py_ssize_t idx)
168 {
169 PyObject *newobj;
170 PyObject *idx_obj = PyLong_FromSsize_t(idx);
171 if (idx_obj == NULL)
172 return NULL;
173 newobj = PyObject_GetItem(obj, idx_obj);
174 Py_DECREF(idx_obj);
175 return newobj;
176 }
177
178 /* do the equivalent of obj[name] */
179 static PyObject *
getitem_str(PyObject * obj,SubString * name)180 getitem_str(PyObject *obj, SubString *name)
181 {
182 PyObject *newobj;
183 PyObject *str = SubString_new_object(name);
184 if (str == NULL)
185 return NULL;
186 newobj = PyObject_GetItem(obj, str);
187 Py_DECREF(str);
188 return newobj;
189 }
190
191 typedef struct {
192 /* the entire string we're parsing. we assume that someone else
193 is managing its lifetime, and that it will exist for the
194 lifetime of the iterator. can be empty */
195 SubString str;
196
197 /* index to where we are inside field_name */
198 Py_ssize_t index;
199 } FieldNameIterator;
200
201
202 static int
FieldNameIterator_init(FieldNameIterator * self,PyObject * s,Py_ssize_t start,Py_ssize_t end)203 FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204 Py_ssize_t start, Py_ssize_t end)
205 {
206 SubString_init(&self->str, s, start, end);
207 self->index = start;
208 return 1;
209 }
210
211 static int
_FieldNameIterator_attr(FieldNameIterator * self,SubString * name)212 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213 {
214 Py_UCS4 c;
215
216 name->str = self->str.str;
217 name->start = self->index;
218
219 /* return everything until '.' or '[' */
220 while (self->index < self->str.end) {
221 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222 switch (c) {
223 case '[':
224 case '.':
225 /* backup so that we this character will be seen next time */
226 self->index--;
227 break;
228 default:
229 continue;
230 }
231 break;
232 }
233 /* end of string is okay */
234 name->end = self->index;
235 return 1;
236 }
237
238 static int
_FieldNameIterator_item(FieldNameIterator * self,SubString * name)239 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240 {
241 int bracket_seen = 0;
242 Py_UCS4 c;
243
244 name->str = self->str.str;
245 name->start = self->index;
246
247 /* return everything until ']' */
248 while (self->index < self->str.end) {
249 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250 switch (c) {
251 case ']':
252 bracket_seen = 1;
253 break;
254 default:
255 continue;
256 }
257 break;
258 }
259 /* make sure we ended with a ']' */
260 if (!bracket_seen) {
261 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262 return 0;
263 }
264
265 /* end of string is okay */
266 /* don't include the ']' */
267 name->end = self->index-1;
268 return 1;
269 }
270
271 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272 static int
FieldNameIterator_next(FieldNameIterator * self,int * is_attribute,Py_ssize_t * name_idx,SubString * name)273 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274 Py_ssize_t *name_idx, SubString *name)
275 {
276 /* check at end of input */
277 if (self->index >= self->str.end)
278 return 1;
279
280 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
281 case '.':
282 *is_attribute = 1;
283 if (_FieldNameIterator_attr(self, name) == 0)
284 return 0;
285 *name_idx = -1;
286 break;
287 case '[':
288 *is_attribute = 0;
289 if (_FieldNameIterator_item(self, name) == 0)
290 return 0;
291 *name_idx = get_integer(name);
292 if (*name_idx == -1 && PyErr_Occurred())
293 return 0;
294 break;
295 default:
296 /* Invalid character follows ']' */
297 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298 "follow ']' in format field specifier");
299 return 0;
300 }
301
302 /* empty string is an error */
303 if (name->start == name->end) {
304 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305 return 0;
306 }
307
308 return 2;
309 }
310
311
312 /* input: field_name
313 output: 'first' points to the part before the first '[' or '.'
314 'first_idx' is -1 if 'first' is not an integer, otherwise
315 it's the value of first converted to an integer
316 'rest' is an iterator to return the rest
317 */
318 static int
field_name_split(PyObject * str,Py_ssize_t start,Py_ssize_t end,SubString * first,Py_ssize_t * first_idx,FieldNameIterator * rest,AutoNumber * auto_number)319 field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
320 Py_ssize_t *first_idx, FieldNameIterator *rest,
321 AutoNumber *auto_number)
322 {
323 Py_UCS4 c;
324 Py_ssize_t i = start;
325 int field_name_is_empty;
326 int using_numeric_index;
327
328 /* find the part up until the first '.' or '[' */
329 while (i < end) {
330 switch (c = PyUnicode_READ_CHAR(str, i++)) {
331 case '[':
332 case '.':
333 /* backup so that we this character is available to the
334 "rest" iterator */
335 i--;
336 break;
337 default:
338 continue;
339 }
340 break;
341 }
342
343 /* set up the return values */
344 SubString_init(first, str, start, i);
345 FieldNameIterator_init(rest, str, i, end);
346
347 /* see if "first" is an integer, in which case it's used as an index */
348 *first_idx = get_integer(first);
349 if (*first_idx == -1 && PyErr_Occurred())
350 return 0;
351
352 field_name_is_empty = first->start >= first->end;
353
354 /* If the field name is omitted or if we have a numeric index
355 specified, then we're doing numeric indexing into args. */
356 using_numeric_index = field_name_is_empty || *first_idx != -1;
357
358 /* We always get here exactly one time for each field we're
359 processing. And we get here in field order (counting by left
360 braces). So this is the perfect place to handle automatic field
361 numbering if the field name is omitted. */
362
363 /* Check if we need to do the auto-numbering. It's not needed if
364 we're called from string.Format routines, because it's handled
365 in that class by itself. */
366 if (auto_number) {
367 /* Initialize our auto numbering state if this is the first
368 time we're either auto-numbering or manually numbering. */
369 if (auto_number->an_state == ANS_INIT && using_numeric_index)
370 auto_number->an_state = field_name_is_empty ?
371 ANS_AUTO : ANS_MANUAL;
372
373 /* Make sure our state is consistent with what we're doing
374 this time through. Only check if we're using a numeric
375 index. */
376 if (using_numeric_index)
377 if (autonumber_state_error(auto_number->an_state,
378 field_name_is_empty))
379 return 0;
380 /* Zero length field means we want to do auto-numbering of the
381 fields. */
382 if (field_name_is_empty)
383 *first_idx = (auto_number->an_field_number)++;
384 }
385
386 return 1;
387 }
388
389
390 /*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394 */
395 static PyObject *
get_field_object(SubString * input,PyObject * args,PyObject * kwargs,AutoNumber * auto_number)396 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397 AutoNumber *auto_number)
398 {
399 PyObject *obj = NULL;
400 int ok;
401 int is_attribute;
402 SubString name;
403 SubString first;
404 Py_ssize_t index;
405 FieldNameIterator rest;
406
407 if (!field_name_split(input->str, input->start, input->end, &first,
408 &index, &rest, auto_number)) {
409 goto error;
410 }
411
412 if (index == -1) {
413 /* look up in kwargs */
414 PyObject *key = SubString_new_object(&first);
415 if (key == NULL) {
416 goto error;
417 }
418 if (kwargs == NULL) {
419 PyErr_SetObject(PyExc_KeyError, key);
420 Py_DECREF(key);
421 goto error;
422 }
423 /* Use PyObject_GetItem instead of PyDict_GetItem because this
424 code is no longer just used with kwargs. It might be passed
425 a non-dict when called through format_map. */
426 obj = PyObject_GetItem(kwargs, key);
427 Py_DECREF(key);
428 if (obj == NULL) {
429 goto error;
430 }
431 }
432 else {
433 /* If args is NULL, we have a format string with a positional field
434 with only kwargs to retrieve it from. This can only happen when
435 used with format_map(), where positional arguments are not
436 allowed. */
437 if (args == NULL) {
438 PyErr_SetString(PyExc_ValueError, "Format string contains "
439 "positional fields");
440 goto error;
441 }
442
443 /* look up in args */
444 obj = PySequence_GetItem(args, index);
445 if (obj == NULL) {
446 PyErr_Format(PyExc_IndexError,
447 "Replacement index %zd out of range for positional "
448 "args tuple",
449 index);
450 goto error;
451 }
452 }
453
454 /* iterate over the rest of the field_name */
455 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
456 &name)) == 2) {
457 PyObject *tmp;
458
459 if (is_attribute)
460 /* getattr lookup "." */
461 tmp = getattr(obj, &name);
462 else
463 /* getitem lookup "[]" */
464 if (index == -1)
465 tmp = getitem_str(obj, &name);
466 else
467 if (PySequence_Check(obj))
468 tmp = getitem_sequence(obj, index);
469 else
470 /* not a sequence */
471 tmp = getitem_idx(obj, index);
472 if (tmp == NULL)
473 goto error;
474
475 /* assign to obj */
476 Py_DECREF(obj);
477 obj = tmp;
478 }
479 /* end of iterator, this is the non-error case */
480 if (ok == 1)
481 return obj;
482 error:
483 Py_XDECREF(obj);
484 return NULL;
485 }
486
487 /************************************************************************/
488 /***************** Field rendering functions **************************/
489 /************************************************************************/
490
491 /*
492 render_field() is the main function in this section. It takes the
493 field object and field specification string generated by
494 get_field_and_spec, and renders the field into the output string.
495
496 render_field calls fieldobj.__format__(format_spec) method, and
497 appends to the output.
498 */
499 static int
render_field(PyObject * fieldobj,SubString * format_spec,_PyUnicodeWriter * writer)500 render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
501 {
502 int ok = 0;
503 PyObject *result = NULL;
504 PyObject *format_spec_object = NULL;
505 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
506 int err;
507
508 /* If we know the type exactly, skip the lookup of __format__ and just
509 call the formatter directly. */
510 if (PyUnicode_CheckExact(fieldobj))
511 formatter = _PyUnicode_FormatAdvancedWriter;
512 else if (PyLong_CheckExact(fieldobj))
513 formatter = _PyLong_FormatAdvancedWriter;
514 else if (PyFloat_CheckExact(fieldobj))
515 formatter = _PyFloat_FormatAdvancedWriter;
516 else if (PyComplex_CheckExact(fieldobj))
517 formatter = _PyComplex_FormatAdvancedWriter;
518
519 if (formatter) {
520 /* we know exactly which formatter will be called when __format__ is
521 looked up, so call it directly, instead. */
522 err = formatter(writer, fieldobj, format_spec->str,
523 format_spec->start, format_spec->end);
524 return (err == 0);
525 }
526 else {
527 /* We need to create an object out of the pointers we have, because
528 __format__ takes a string/unicode object for format_spec. */
529 if (format_spec->str)
530 format_spec_object = PyUnicode_Substring(format_spec->str,
531 format_spec->start,
532 format_spec->end);
533 else
534 format_spec_object = PyUnicode_New(0, 0);
535 if (format_spec_object == NULL)
536 goto done;
537
538 result = PyObject_Format(fieldobj, format_spec_object);
539 }
540 if (result == NULL)
541 goto done;
542
543 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
544 goto done;
545 ok = 1;
546
547 done:
548 Py_XDECREF(format_spec_object);
549 Py_XDECREF(result);
550 return ok;
551 }
552
553 static int
parse_field(SubString * str,SubString * field_name,SubString * format_spec,int * format_spec_needs_expanding,Py_UCS4 * conversion)554 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
555 int *format_spec_needs_expanding, Py_UCS4 *conversion)
556 {
557 /* Note this function works if the field name is zero length,
558 which is good. Zero length field names are handled later, in
559 field_name_split. */
560
561 Py_UCS4 c = 0;
562
563 /* initialize these, as they may be empty */
564 *conversion = '\0';
565 SubString_init(format_spec, NULL, 0, 0);
566
567 /* Search for the field name. it's terminated by the end of
568 the string, or a ':' or '!' */
569 field_name->str = str->str;
570 field_name->start = str->start;
571 while (str->start < str->end) {
572 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
573 case '{':
574 PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
575 return 0;
576 case '[':
577 for (; str->start < str->end; str->start++)
578 if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
579 break;
580 continue;
581 case '}':
582 case ':':
583 case '!':
584 break;
585 default:
586 continue;
587 }
588 break;
589 }
590
591 field_name->end = str->start - 1;
592 if (c == '!' || c == ':') {
593 Py_ssize_t count;
594 /* we have a format specifier and/or a conversion */
595 /* don't include the last character */
596
597 /* see if there's a conversion specifier */
598 if (c == '!') {
599 /* there must be another character present */
600 if (str->start >= str->end) {
601 PyErr_SetString(PyExc_ValueError,
602 "end of string while looking for conversion "
603 "specifier");
604 return 0;
605 }
606 *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
607
608 if (str->start < str->end) {
609 c = PyUnicode_READ_CHAR(str->str, str->start++);
610 if (c == '}')
611 return 1;
612 if (c != ':') {
613 PyErr_SetString(PyExc_ValueError,
614 "expected ':' after conversion specifier");
615 return 0;
616 }
617 }
618 }
619 format_spec->str = str->str;
620 format_spec->start = str->start;
621 count = 1;
622 while (str->start < str->end) {
623 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
624 case '{':
625 *format_spec_needs_expanding = 1;
626 count++;
627 break;
628 case '}':
629 count--;
630 if (count == 0) {
631 format_spec->end = str->start - 1;
632 return 1;
633 }
634 break;
635 default:
636 break;
637 }
638 }
639
640 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
641 return 0;
642 }
643 else if (c != '}') {
644 PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
645 return 0;
646 }
647
648 return 1;
649 }
650
651 /************************************************************************/
652 /******* Output string allocation and escape-to-markup processing ******/
653 /************************************************************************/
654
655 /* MarkupIterator breaks the string into pieces of either literal
656 text, or things inside {} that need to be marked up. it is
657 designed to make it easy to wrap a Python iterator around it, for
658 use with the Formatter class */
659
660 typedef struct {
661 SubString str;
662 } MarkupIterator;
663
664 static int
MarkupIterator_init(MarkupIterator * self,PyObject * str,Py_ssize_t start,Py_ssize_t end)665 MarkupIterator_init(MarkupIterator *self, PyObject *str,
666 Py_ssize_t start, Py_ssize_t end)
667 {
668 SubString_init(&self->str, str, start, end);
669 return 1;
670 }
671
672 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
673 string (or something to be expanded) */
674 static int
MarkupIterator_next(MarkupIterator * self,SubString * literal,int * field_present,SubString * field_name,SubString * format_spec,Py_UCS4 * conversion,int * format_spec_needs_expanding)675 MarkupIterator_next(MarkupIterator *self, SubString *literal,
676 int *field_present, SubString *field_name,
677 SubString *format_spec, Py_UCS4 *conversion,
678 int *format_spec_needs_expanding)
679 {
680 int at_end;
681 Py_UCS4 c = 0;
682 Py_ssize_t start;
683 Py_ssize_t len;
684 int markup_follows = 0;
685
686 /* initialize all of the output variables */
687 SubString_init(literal, NULL, 0, 0);
688 SubString_init(field_name, NULL, 0, 0);
689 SubString_init(format_spec, NULL, 0, 0);
690 *conversion = '\0';
691 *format_spec_needs_expanding = 0;
692 *field_present = 0;
693
694 /* No more input, end of iterator. This is the normal exit
695 path. */
696 if (self->str.start >= self->str.end)
697 return 1;
698
699 start = self->str.start;
700
701 /* First read any literal text. Read until the end of string, an
702 escaped '{' or '}', or an unescaped '{'. In order to never
703 allocate memory and so I can just pass pointers around, if
704 there's an escaped '{' or '}' then we'll return the literal
705 including the brace, but no format object. The next time
706 through, we'll return the rest of the literal, skipping past
707 the second consecutive brace. */
708 while (self->str.start < self->str.end) {
709 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
710 case '{':
711 case '}':
712 markup_follows = 1;
713 break;
714 default:
715 continue;
716 }
717 break;
718 }
719
720 at_end = self->str.start >= self->str.end;
721 len = self->str.start - start;
722
723 if ((c == '}') && (at_end ||
724 (c != PyUnicode_READ_CHAR(self->str.str,
725 self->str.start)))) {
726 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
727 "in format string");
728 return 0;
729 }
730 if (at_end && c == '{') {
731 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
732 "in format string");
733 return 0;
734 }
735 if (!at_end) {
736 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
737 /* escaped } or {, skip it in the input. there is no
738 markup object following us, just this literal text */
739 self->str.start++;
740 markup_follows = 0;
741 }
742 else
743 len--;
744 }
745
746 /* record the literal text */
747 literal->str = self->str.str;
748 literal->start = start;
749 literal->end = start + len;
750
751 if (!markup_follows)
752 return 2;
753
754 /* this is markup; parse the field */
755 *field_present = 1;
756 if (!parse_field(&self->str, field_name, format_spec,
757 format_spec_needs_expanding, conversion))
758 return 0;
759 return 2;
760 }
761
762
763 /* do the !r or !s conversion on obj */
764 static PyObject *
do_conversion(PyObject * obj,Py_UCS4 conversion)765 do_conversion(PyObject *obj, Py_UCS4 conversion)
766 {
767 /* XXX in pre-3.0, do we need to convert this to unicode, since it
768 might have returned a string? */
769 switch (conversion) {
770 case 'r':
771 return PyObject_Repr(obj);
772 case 's':
773 return PyObject_Str(obj);
774 case 'a':
775 return PyObject_ASCII(obj);
776 default:
777 if (conversion > 32 && conversion < 127) {
778 /* It's the ASCII subrange; casting to char is safe
779 (assuming the execution character set is an ASCII
780 superset). */
781 PyErr_Format(PyExc_ValueError,
782 "Unknown conversion specifier %c",
783 (char)conversion);
784 } else
785 PyErr_Format(PyExc_ValueError,
786 "Unknown conversion specifier \\x%x",
787 (unsigned int)conversion);
788 return NULL;
789 }
790 }
791
792 /* given:
793
794 {field_name!conversion:format_spec}
795
796 compute the result and write it to output.
797 format_spec_needs_expanding is an optimization. if it's false,
798 just output the string directly, otherwise recursively expand the
799 format_spec string.
800
801 field_name is allowed to be zero length, in which case we
802 are doing auto field numbering.
803 */
804
805 static int
output_markup(SubString * field_name,SubString * format_spec,int format_spec_needs_expanding,Py_UCS4 conversion,_PyUnicodeWriter * writer,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)806 output_markup(SubString *field_name, SubString *format_spec,
807 int format_spec_needs_expanding, Py_UCS4 conversion,
808 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
809 int recursion_depth, AutoNumber *auto_number)
810 {
811 PyObject *tmp = NULL;
812 PyObject *fieldobj = NULL;
813 SubString expanded_format_spec;
814 SubString *actual_format_spec;
815 int result = 0;
816
817 /* convert field_name to an object */
818 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
819 if (fieldobj == NULL)
820 goto done;
821
822 if (conversion != '\0') {
823 tmp = do_conversion(fieldobj, conversion);
824 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
825 goto done;
826
827 /* do the assignment, transferring ownership: fieldobj = tmp */
828 Py_DECREF(fieldobj);
829 fieldobj = tmp;
830 tmp = NULL;
831 }
832
833 /* if needed, recursively compute the format_spec */
834 if (format_spec_needs_expanding) {
835 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
836 auto_number);
837 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
838 goto done;
839
840 /* note that in the case we're expanding the format string,
841 tmp must be kept around until after the call to
842 render_field. */
843 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
844 actual_format_spec = &expanded_format_spec;
845 }
846 else
847 actual_format_spec = format_spec;
848
849 if (render_field(fieldobj, actual_format_spec, writer) == 0)
850 goto done;
851
852 result = 1;
853
854 done:
855 Py_XDECREF(fieldobj);
856 Py_XDECREF(tmp);
857
858 return result;
859 }
860
861 /*
862 do_markup is the top-level loop for the format() method. It
863 searches through the format string for escapes to markup codes, and
864 calls other functions to move non-markup text to the output,
865 and to perform the markup to the output.
866 */
867 static int
do_markup(SubString * input,PyObject * args,PyObject * kwargs,_PyUnicodeWriter * writer,int recursion_depth,AutoNumber * auto_number)868 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
869 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
870 {
871 MarkupIterator iter;
872 int format_spec_needs_expanding;
873 int result;
874 int field_present;
875 SubString literal;
876 SubString field_name;
877 SubString format_spec;
878 Py_UCS4 conversion;
879
880 MarkupIterator_init(&iter, input->str, input->start, input->end);
881 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
882 &field_name, &format_spec,
883 &conversion,
884 &format_spec_needs_expanding)) == 2) {
885 if (literal.end != literal.start) {
886 if (!field_present && iter.str.start == iter.str.end)
887 writer->overallocate = 0;
888 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
889 literal.start, literal.end) < 0)
890 return 0;
891 }
892
893 if (field_present) {
894 if (iter.str.start == iter.str.end)
895 writer->overallocate = 0;
896 if (!output_markup(&field_name, &format_spec,
897 format_spec_needs_expanding, conversion, writer,
898 args, kwargs, recursion_depth, auto_number))
899 return 0;
900 }
901 }
902 return result;
903 }
904
905
906 /*
907 build_string allocates the output string and then
908 calls do_markup to do the heavy lifting.
909 */
910 static PyObject *
build_string(SubString * input,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)911 build_string(SubString *input, PyObject *args, PyObject *kwargs,
912 int recursion_depth, AutoNumber *auto_number)
913 {
914 _PyUnicodeWriter writer;
915
916 /* check the recursion level */
917 if (recursion_depth <= 0) {
918 PyErr_SetString(PyExc_ValueError,
919 "Max string recursion exceeded");
920 return NULL;
921 }
922
923 _PyUnicodeWriter_Init(&writer);
924 writer.overallocate = 1;
925 writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
926
927 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
928 auto_number)) {
929 _PyUnicodeWriter_Dealloc(&writer);
930 return NULL;
931 }
932
933 return _PyUnicodeWriter_Finish(&writer);
934 }
935
936 /************************************************************************/
937 /*********** main routine ***********************************************/
938 /************************************************************************/
939
940 /* this is the main entry point */
941 static PyObject *
do_string_format(PyObject * self,PyObject * args,PyObject * kwargs)942 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
943 {
944 SubString input;
945
946 /* PEP 3101 says only 2 levels, so that
947 "{0:{1}}".format('abc', 's') # works
948 "{0:{1:{2}}}".format('abc', 's', '') # fails
949 */
950 int recursion_depth = 2;
951
952 AutoNumber auto_number;
953
954 if (PyUnicode_READY(self) == -1)
955 return NULL;
956
957 AutoNumber_Init(&auto_number);
958 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
959 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
960 }
961
962 static PyObject *
do_string_format_map(PyObject * self,PyObject * obj)963 do_string_format_map(PyObject *self, PyObject *obj)
964 {
965 return do_string_format(self, NULL, obj);
966 }
967
968
969 /************************************************************************/
970 /*********** formatteriterator ******************************************/
971 /************************************************************************/
972
973 /* This is used to implement string.Formatter.vparse(). It exists so
974 Formatter can share code with the built in unicode.format() method.
975 It's really just a wrapper around MarkupIterator that is callable
976 from Python. */
977
978 typedef struct {
979 PyObject_HEAD
980 PyObject *str;
981 MarkupIterator it_markup;
982 } formatteriterobject;
983
984 static void
formatteriter_dealloc(formatteriterobject * it)985 formatteriter_dealloc(formatteriterobject *it)
986 {
987 Py_XDECREF(it->str);
988 PyObject_Free(it);
989 }
990
991 /* returns a tuple:
992 (literal, field_name, format_spec, conversion)
993
994 literal is any literal text to output. might be zero length
995 field_name is the string before the ':'. might be None
996 format_spec is the string after the ':'. mibht be None
997 conversion is either None, or the string after the '!'
998 */
999 static PyObject *
formatteriter_next(formatteriterobject * it)1000 formatteriter_next(formatteriterobject *it)
1001 {
1002 SubString literal;
1003 SubString field_name;
1004 SubString format_spec;
1005 Py_UCS4 conversion;
1006 int format_spec_needs_expanding;
1007 int field_present;
1008 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1009 &field_name, &format_spec, &conversion,
1010 &format_spec_needs_expanding);
1011
1012 /* all of the SubString objects point into it->str, so no
1013 memory management needs to be done on them */
1014 assert(0 <= result && result <= 2);
1015 if (result == 0 || result == 1)
1016 /* if 0, error has already been set, if 1, iterator is empty */
1017 return NULL;
1018 else {
1019 PyObject *literal_str = NULL;
1020 PyObject *field_name_str = NULL;
1021 PyObject *format_spec_str = NULL;
1022 PyObject *conversion_str = NULL;
1023 PyObject *tuple = NULL;
1024
1025 literal_str = SubString_new_object(&literal);
1026 if (literal_str == NULL)
1027 goto done;
1028
1029 field_name_str = SubString_new_object(&field_name);
1030 if (field_name_str == NULL)
1031 goto done;
1032
1033 /* if field_name is non-zero length, return a string for
1034 format_spec (even if zero length), else return None */
1035 format_spec_str = (field_present ?
1036 SubString_new_object_or_empty :
1037 SubString_new_object)(&format_spec);
1038 if (format_spec_str == NULL)
1039 goto done;
1040
1041 /* if the conversion is not specified, return a None,
1042 otherwise create a one length string with the conversion
1043 character */
1044 if (conversion == '\0') {
1045 conversion_str = Py_None;
1046 Py_INCREF(conversion_str);
1047 }
1048 else
1049 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1050 &conversion, 1);
1051 if (conversion_str == NULL)
1052 goto done;
1053
1054 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1055 conversion_str);
1056 done:
1057 Py_XDECREF(literal_str);
1058 Py_XDECREF(field_name_str);
1059 Py_XDECREF(format_spec_str);
1060 Py_XDECREF(conversion_str);
1061 return tuple;
1062 }
1063 }
1064
1065 static PyMethodDef formatteriter_methods[] = {
1066 {NULL, NULL} /* sentinel */
1067 };
1068
1069 static PyTypeObject PyFormatterIter_Type = {
1070 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1071 "formatteriterator", /* tp_name */
1072 sizeof(formatteriterobject), /* tp_basicsize */
1073 0, /* tp_itemsize */
1074 /* methods */
1075 (destructor)formatteriter_dealloc, /* tp_dealloc */
1076 0, /* tp_vectorcall_offset */
1077 0, /* tp_getattr */
1078 0, /* tp_setattr */
1079 0, /* tp_as_async */
1080 0, /* tp_repr */
1081 0, /* tp_as_number */
1082 0, /* tp_as_sequence */
1083 0, /* tp_as_mapping */
1084 0, /* tp_hash */
1085 0, /* tp_call */
1086 0, /* tp_str */
1087 PyObject_GenericGetAttr, /* tp_getattro */
1088 0, /* tp_setattro */
1089 0, /* tp_as_buffer */
1090 Py_TPFLAGS_DEFAULT, /* tp_flags */
1091 0, /* tp_doc */
1092 0, /* tp_traverse */
1093 0, /* tp_clear */
1094 0, /* tp_richcompare */
1095 0, /* tp_weaklistoffset */
1096 PyObject_SelfIter, /* tp_iter */
1097 (iternextfunc)formatteriter_next, /* tp_iternext */
1098 formatteriter_methods, /* tp_methods */
1099 0,
1100 };
1101
1102 /* unicode_formatter_parser is used to implement
1103 string.Formatter.vformat. it parses a string and returns tuples
1104 describing the parsed elements. It's a wrapper around
1105 stringlib/string_format.h's MarkupIterator */
1106 static PyObject *
formatter_parser(PyObject * ignored,PyObject * self)1107 formatter_parser(PyObject *ignored, PyObject *self)
1108 {
1109 formatteriterobject *it;
1110
1111 if (!PyUnicode_Check(self)) {
1112 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1113 return NULL;
1114 }
1115
1116 if (PyUnicode_READY(self) == -1)
1117 return NULL;
1118
1119 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1120 if (it == NULL)
1121 return NULL;
1122
1123 /* take ownership, give the object to the iterator */
1124 Py_INCREF(self);
1125 it->str = self;
1126
1127 /* initialize the contained MarkupIterator */
1128 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1129 return (PyObject *)it;
1130 }
1131
1132
1133 /************************************************************************/
1134 /*********** fieldnameiterator ******************************************/
1135 /************************************************************************/
1136
1137
1138 /* This is used to implement string.Formatter.vparse(). It parses the
1139 field name into attribute and item values. It's a Python-callable
1140 wrapper around FieldNameIterator */
1141
1142 typedef struct {
1143 PyObject_HEAD
1144 PyObject *str;
1145 FieldNameIterator it_field;
1146 } fieldnameiterobject;
1147
1148 static void
fieldnameiter_dealloc(fieldnameiterobject * it)1149 fieldnameiter_dealloc(fieldnameiterobject *it)
1150 {
1151 Py_XDECREF(it->str);
1152 PyObject_Free(it);
1153 }
1154
1155 /* returns a tuple:
1156 (is_attr, value)
1157 is_attr is true if we used attribute syntax (e.g., '.foo')
1158 false if we used index syntax (e.g., '[foo]')
1159 value is an integer or string
1160 */
1161 static PyObject *
fieldnameiter_next(fieldnameiterobject * it)1162 fieldnameiter_next(fieldnameiterobject *it)
1163 {
1164 int result;
1165 int is_attr;
1166 Py_ssize_t idx;
1167 SubString name;
1168
1169 result = FieldNameIterator_next(&it->it_field, &is_attr,
1170 &idx, &name);
1171 if (result == 0 || result == 1)
1172 /* if 0, error has already been set, if 1, iterator is empty */
1173 return NULL;
1174 else {
1175 PyObject* result = NULL;
1176 PyObject* is_attr_obj = NULL;
1177 PyObject* obj = NULL;
1178
1179 is_attr_obj = PyBool_FromLong(is_attr);
1180 if (is_attr_obj == NULL)
1181 goto done;
1182
1183 /* either an integer or a string */
1184 if (idx != -1)
1185 obj = PyLong_FromSsize_t(idx);
1186 else
1187 obj = SubString_new_object(&name);
1188 if (obj == NULL)
1189 goto done;
1190
1191 /* return a tuple of values */
1192 result = PyTuple_Pack(2, is_attr_obj, obj);
1193
1194 done:
1195 Py_XDECREF(is_attr_obj);
1196 Py_XDECREF(obj);
1197 return result;
1198 }
1199 }
1200
1201 static PyMethodDef fieldnameiter_methods[] = {
1202 {NULL, NULL} /* sentinel */
1203 };
1204
1205 static PyTypeObject PyFieldNameIter_Type = {
1206 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1207 "fieldnameiterator", /* tp_name */
1208 sizeof(fieldnameiterobject), /* tp_basicsize */
1209 0, /* tp_itemsize */
1210 /* methods */
1211 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1212 0, /* tp_vectorcall_offset */
1213 0, /* tp_getattr */
1214 0, /* tp_setattr */
1215 0, /* tp_as_async */
1216 0, /* tp_repr */
1217 0, /* tp_as_number */
1218 0, /* tp_as_sequence */
1219 0, /* tp_as_mapping */
1220 0, /* tp_hash */
1221 0, /* tp_call */
1222 0, /* tp_str */
1223 PyObject_GenericGetAttr, /* tp_getattro */
1224 0, /* tp_setattro */
1225 0, /* tp_as_buffer */
1226 Py_TPFLAGS_DEFAULT, /* tp_flags */
1227 0, /* tp_doc */
1228 0, /* tp_traverse */
1229 0, /* tp_clear */
1230 0, /* tp_richcompare */
1231 0, /* tp_weaklistoffset */
1232 PyObject_SelfIter, /* tp_iter */
1233 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1234 fieldnameiter_methods, /* tp_methods */
1235 0};
1236
1237 /* unicode_formatter_field_name_split is used to implement
1238 string.Formatter.vformat. it takes a PEP 3101 "field name", and
1239 returns a tuple of (first, rest): "first", the part before the
1240 first '.' or '['; and "rest", an iterator for the rest of the field
1241 name. it's a wrapper around stringlib/string_format.h's
1242 field_name_split. The iterator it returns is a
1243 FieldNameIterator */
1244 static PyObject *
formatter_field_name_split(PyObject * ignored,PyObject * self)1245 formatter_field_name_split(PyObject *ignored, PyObject *self)
1246 {
1247 SubString first;
1248 Py_ssize_t first_idx;
1249 fieldnameiterobject *it;
1250
1251 PyObject *first_obj = NULL;
1252 PyObject *result = NULL;
1253
1254 if (!PyUnicode_Check(self)) {
1255 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1256 return NULL;
1257 }
1258
1259 if (PyUnicode_READY(self) == -1)
1260 return NULL;
1261
1262 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1263 if (it == NULL)
1264 return NULL;
1265
1266 /* take ownership, give the object to the iterator. this is
1267 just to keep the field_name alive */
1268 Py_INCREF(self);
1269 it->str = self;
1270
1271 /* Pass in auto_number = NULL. We'll return an empty string for
1272 first_obj in that case. */
1273 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1274 &first, &first_idx, &it->it_field, NULL))
1275 goto done;
1276
1277 /* first becomes an integer, if possible; else a string */
1278 if (first_idx != -1)
1279 first_obj = PyLong_FromSsize_t(first_idx);
1280 else
1281 /* convert "first" into a string object */
1282 first_obj = SubString_new_object(&first);
1283 if (first_obj == NULL)
1284 goto done;
1285
1286 /* return a tuple of values */
1287 result = PyTuple_Pack(2, first_obj, it);
1288
1289 done:
1290 Py_XDECREF(it);
1291 Py_XDECREF(first_obj);
1292 return result;
1293 }
1294