1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "internal/mem.h"
7 #include "internal/pystate.h"
8 
9 #include "bytes_methods.h"
10 #include "pystrhex.h"
11 #include <stddef.h>
12 
13 /*[clinic input]
14 class bytes "PyBytesObject *" "&PyBytes_Type"
15 [clinic start generated code]*/
16 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
17 
18 #include "clinic/bytesobject.c.h"
19 
20 #ifdef COUNT_ALLOCS
21 Py_ssize_t null_strings, one_strings;
22 #endif
23 
24 static PyBytesObject *characters[UCHAR_MAX + 1];
25 static PyBytesObject *nullstring;
26 
27 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28    for a string of length n should request PyBytesObject_SIZE + n bytes.
29 
30    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31    3 bytes per string allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34 
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37                                                    char *str);
38 
39 /*
40    For PyBytes_FromString(), the parameter `str' points to a null-terminated
41    string containing exactly `size' bytes.
42 
43    For PyBytes_FromStringAndSize(), the parameter `str' is
44    either NULL or else points to a string containing at least `size' bytes.
45    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46    not have to be null-terminated.  (Therefore it is safe to construct a
47    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49    bytes (setting the last byte to the null terminating character) and you can
50    fill in the data yourself.  If `str' is non-NULL then the resulting
51    PyBytes object must be treated as immutable and you must not fill in nor
52    alter the data yourself, since the strings may be shared.
53 
54    The PyObject member `op->ob_size', which denotes the number of "extra
55    items" in a variable-size object, will contain the number of bytes
56    allocated for string data, not counting the null terminating character.
57    It is therefore equal to the `size' parameter (for
58    PyBytes_FromStringAndSize()) or the length of the string in the `str'
59    parameter (for PyBytes_FromString()).
60 */
61 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)62 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
63 {
64     PyBytesObject *op;
65     assert(size >= 0);
66 
67     if (size == 0 && (op = nullstring) != NULL) {
68 #ifdef COUNT_ALLOCS
69         null_strings++;
70 #endif
71         Py_INCREF(op);
72         return (PyObject *)op;
73     }
74 
75     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
76         PyErr_SetString(PyExc_OverflowError,
77                         "byte string is too large");
78         return NULL;
79     }
80 
81     /* Inline PyObject_NewVar */
82     if (use_calloc)
83         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84     else
85         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
86     if (op == NULL)
87         return PyErr_NoMemory();
88     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
89     op->ob_shash = -1;
90     if (!use_calloc)
91         op->ob_sval[size] = '\0';
92     /* empty byte string singleton */
93     if (size == 0) {
94         nullstring = op;
95         Py_INCREF(op);
96     }
97     return (PyObject *) op;
98 }
99 
100 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)101 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102 {
103     PyBytesObject *op;
104     if (size < 0) {
105         PyErr_SetString(PyExc_SystemError,
106             "Negative size passed to PyBytes_FromStringAndSize");
107         return NULL;
108     }
109     if (size == 1 && str != NULL &&
110         (op = characters[*str & UCHAR_MAX]) != NULL)
111     {
112 #ifdef COUNT_ALLOCS
113         one_strings++;
114 #endif
115         Py_INCREF(op);
116         return (PyObject *)op;
117     }
118 
119     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120     if (op == NULL)
121         return NULL;
122     if (str == NULL)
123         return (PyObject *) op;
124 
125     memcpy(op->ob_sval, str, size);
126     /* share short strings */
127     if (size == 1) {
128         characters[*str & UCHAR_MAX] = op;
129         Py_INCREF(op);
130     }
131     return (PyObject *) op;
132 }
133 
134 PyObject *
PyBytes_FromString(const char * str)135 PyBytes_FromString(const char *str)
136 {
137     size_t size;
138     PyBytesObject *op;
139 
140     assert(str != NULL);
141     size = strlen(str);
142     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143         PyErr_SetString(PyExc_OverflowError,
144             "byte string is too long");
145         return NULL;
146     }
147     if (size == 0 && (op = nullstring) != NULL) {
148 #ifdef COUNT_ALLOCS
149         null_strings++;
150 #endif
151         Py_INCREF(op);
152         return (PyObject *)op;
153     }
154     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
155 #ifdef COUNT_ALLOCS
156         one_strings++;
157 #endif
158         Py_INCREF(op);
159         return (PyObject *)op;
160     }
161 
162     /* Inline PyObject_NewVar */
163     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164     if (op == NULL)
165         return PyErr_NoMemory();
166     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
167     op->ob_shash = -1;
168     memcpy(op->ob_sval, str, size+1);
169     /* share short strings */
170     if (size == 0) {
171         nullstring = op;
172         Py_INCREF(op);
173     } else if (size == 1) {
174         characters[*str & UCHAR_MAX] = op;
175         Py_INCREF(op);
176     }
177     return (PyObject *) op;
178 }
179 
180 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)181 PyBytes_FromFormatV(const char *format, va_list vargs)
182 {
183     char *s;
184     const char *f;
185     const char *p;
186     Py_ssize_t prec;
187     int longflag;
188     int size_tflag;
189     /* Longest 64-bit formatted numbers:
190        - "18446744073709551615\0" (21 bytes)
191        - "-9223372036854775808\0" (21 bytes)
192        Decimal takes the most space (it isn't enough for octal.)
193 
194        Longest 64-bit pointer representation:
195        "0xffffffffffffffff\0" (19 bytes). */
196     char buffer[21];
197     _PyBytesWriter writer;
198 
199     _PyBytesWriter_Init(&writer);
200 
201     s = _PyBytesWriter_Alloc(&writer, strlen(format));
202     if (s == NULL)
203         return NULL;
204     writer.overallocate = 1;
205 
206 #define WRITE_BYTES(str) \
207     do { \
208         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209         if (s == NULL) \
210             goto error; \
211     } while (0)
212 
213     for (f = format; *f; f++) {
214         if (*f != '%') {
215             *s++ = *f;
216             continue;
217         }
218 
219         p = f++;
220 
221         /* ignore the width (ex: 10 in "%10s") */
222         while (Py_ISDIGIT(*f))
223             f++;
224 
225         /* parse the precision (ex: 10 in "%.10s") */
226         prec = 0;
227         if (*f == '.') {
228             f++;
229             for (; Py_ISDIGIT(*f); f++) {
230                 prec = (prec * 10) + (*f - '0');
231             }
232         }
233 
234         while (*f && *f != '%' && !Py_ISALPHA(*f))
235             f++;
236 
237         /* handle the long flag ('l'), but only for %ld and %lu.
238            others can be added when necessary. */
239         longflag = 0;
240         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241             longflag = 1;
242             ++f;
243         }
244 
245         /* handle the size_t flag ('z'). */
246         size_tflag = 0;
247         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248             size_tflag = 1;
249             ++f;
250         }
251 
252         /* subtract bytes preallocated for the format string
253            (ex: 2 for "%s") */
254         writer.min_size -= (f - p + 1);
255 
256         switch (*f) {
257         case 'c':
258         {
259             int c = va_arg(vargs, int);
260             if (c < 0 || c > 255) {
261                 PyErr_SetString(PyExc_OverflowError,
262                                 "PyBytes_FromFormatV(): %c format "
263                                 "expects an integer in range [0; 255]");
264                 goto error;
265             }
266             writer.min_size++;
267             *s++ = (unsigned char)c;
268             break;
269         }
270 
271         case 'd':
272             if (longflag)
273                 sprintf(buffer, "%ld", va_arg(vargs, long));
274             else if (size_tflag)
275                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276                     va_arg(vargs, Py_ssize_t));
277             else
278                 sprintf(buffer, "%d", va_arg(vargs, int));
279             assert(strlen(buffer) < sizeof(buffer));
280             WRITE_BYTES(buffer);
281             break;
282 
283         case 'u':
284             if (longflag)
285                 sprintf(buffer, "%lu",
286                     va_arg(vargs, unsigned long));
287             else if (size_tflag)
288                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289                     va_arg(vargs, size_t));
290             else
291                 sprintf(buffer, "%u",
292                     va_arg(vargs, unsigned int));
293             assert(strlen(buffer) < sizeof(buffer));
294             WRITE_BYTES(buffer);
295             break;
296 
297         case 'i':
298             sprintf(buffer, "%i", va_arg(vargs, int));
299             assert(strlen(buffer) < sizeof(buffer));
300             WRITE_BYTES(buffer);
301             break;
302 
303         case 'x':
304             sprintf(buffer, "%x", va_arg(vargs, int));
305             assert(strlen(buffer) < sizeof(buffer));
306             WRITE_BYTES(buffer);
307             break;
308 
309         case 's':
310         {
311             Py_ssize_t i;
312 
313             p = va_arg(vargs, const char*);
314             i = strlen(p);
315             if (prec > 0 && i > prec)
316                 i = prec;
317             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
318             if (s == NULL)
319                 goto error;
320             break;
321         }
322 
323         case 'p':
324             sprintf(buffer, "%p", va_arg(vargs, void*));
325             assert(strlen(buffer) < sizeof(buffer));
326             /* %p is ill-defined:  ensure leading 0x. */
327             if (buffer[1] == 'X')
328                 buffer[1] = 'x';
329             else if (buffer[1] != 'x') {
330                 memmove(buffer+2, buffer, strlen(buffer)+1);
331                 buffer[0] = '0';
332                 buffer[1] = 'x';
333             }
334             WRITE_BYTES(buffer);
335             break;
336 
337         case '%':
338             writer.min_size++;
339             *s++ = '%';
340             break;
341 
342         default:
343             if (*f == 0) {
344                 /* fix min_size if we reached the end of the format string */
345                 writer.min_size++;
346             }
347 
348             /* invalid format string: copy unformatted string and exit */
349             WRITE_BYTES(p);
350             return _PyBytesWriter_Finish(&writer, s);
351         }
352     }
353 
354 #undef WRITE_BYTES
355 
356     return _PyBytesWriter_Finish(&writer, s);
357 
358  error:
359     _PyBytesWriter_Dealloc(&writer);
360     return NULL;
361 }
362 
363 PyObject *
PyBytes_FromFormat(const char * format,...)364 PyBytes_FromFormat(const char *format, ...)
365 {
366     PyObject* ret;
367     va_list vargs;
368 
369 #ifdef HAVE_STDARG_PROTOTYPES
370     va_start(vargs, format);
371 #else
372     va_start(vargs);
373 #endif
374     ret = PyBytes_FromFormatV(format, vargs);
375     va_end(vargs);
376     return ret;
377 }
378 
379 /* Helpers for formatstring */
380 
381 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)382 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383 {
384     Py_ssize_t argidx = *p_argidx;
385     if (argidx < arglen) {
386         (*p_argidx)++;
387         if (arglen < 0)
388             return args;
389         else
390             return PyTuple_GetItem(args, argidx);
391     }
392     PyErr_SetString(PyExc_TypeError,
393                     "not enough arguments for format string");
394     return NULL;
395 }
396 
397 /* Format codes
398  * F_LJUST      '-'
399  * F_SIGN       '+'
400  * F_BLANK      ' '
401  * F_ALT        '#'
402  * F_ZERO       '0'
403  */
404 #define F_LJUST (1<<0)
405 #define F_SIGN  (1<<1)
406 #define F_BLANK (1<<2)
407 #define F_ALT   (1<<3)
408 #define F_ZERO  (1<<4)
409 
410 /* Returns a new reference to a PyBytes object, or NULL on failure. */
411 
412 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)413 formatfloat(PyObject *v, int flags, int prec, int type,
414             PyObject **p_result, _PyBytesWriter *writer, char *str)
415 {
416     char *p;
417     PyObject *result;
418     double x;
419     size_t len;
420 
421     x = PyFloat_AsDouble(v);
422     if (x == -1.0 && PyErr_Occurred()) {
423         PyErr_Format(PyExc_TypeError, "float argument required, "
424                      "not %.200s", Py_TYPE(v)->tp_name);
425         return NULL;
426     }
427 
428     if (prec < 0)
429         prec = 6;
430 
431     p = PyOS_double_to_string(x, type, prec,
432                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433 
434     if (p == NULL)
435         return NULL;
436 
437     len = strlen(p);
438     if (writer != NULL) {
439         str = _PyBytesWriter_Prepare(writer, str, len);
440         if (str == NULL)
441             return NULL;
442         memcpy(str, p, len);
443         PyMem_Free(p);
444         str += len;
445         return str;
446     }
447 
448     result = PyBytes_FromStringAndSize(p, len);
449     PyMem_Free(p);
450     *p_result = result;
451     return str;
452 }
453 
454 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)455 formatlong(PyObject *v, int flags, int prec, int type)
456 {
457     PyObject *result, *iobj;
458     if (type == 'i')
459         type = 'd';
460     if (PyLong_Check(v))
461         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
462     if (PyNumber_Check(v)) {
463         /* make sure number is a type of integer for o, x, and X */
464         if (type == 'o' || type == 'x' || type == 'X')
465             iobj = PyNumber_Index(v);
466         else
467             iobj = PyNumber_Long(v);
468         if (iobj == NULL) {
469             if (!PyErr_ExceptionMatches(PyExc_TypeError))
470                 return NULL;
471         }
472         else if (!PyLong_Check(iobj))
473             Py_CLEAR(iobj);
474         if (iobj != NULL) {
475             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
476             Py_DECREF(iobj);
477             return result;
478         }
479     }
480     PyErr_Format(PyExc_TypeError,
481         "%%%c format: %s is required, not %.200s", type,
482         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483                                                     : "a number",
484         Py_TYPE(v)->tp_name);
485     return NULL;
486 }
487 
488 static int
byte_converter(PyObject * arg,char * p)489 byte_converter(PyObject *arg, char *p)
490 {
491     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
492         *p = PyBytes_AS_STRING(arg)[0];
493         return 1;
494     }
495     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
496         *p = PyByteArray_AS_STRING(arg)[0];
497         return 1;
498     }
499     else {
500         PyObject *iobj;
501         long ival;
502         int overflow;
503         /* make sure number is a type of integer */
504         if (PyLong_Check(arg)) {
505             ival = PyLong_AsLongAndOverflow(arg, &overflow);
506         }
507         else {
508             iobj = PyNumber_Index(arg);
509             if (iobj == NULL) {
510                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
511                     return 0;
512                 goto onError;
513             }
514             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
515             Py_DECREF(iobj);
516         }
517         if (!overflow && ival == -1 && PyErr_Occurred())
518             goto onError;
519         if (overflow || !(0 <= ival && ival <= 255)) {
520             PyErr_SetString(PyExc_OverflowError,
521                             "%c arg not in range(256)");
522             return 0;
523         }
524         *p = (char)ival;
525         return 1;
526     }
527   onError:
528     PyErr_SetString(PyExc_TypeError,
529         "%c requires an integer in range(256) or a single byte");
530     return 0;
531 }
532 
533 static PyObject *_PyBytes_FromBuffer(PyObject *x);
534 
535 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)536 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
537 {
538     PyObject *func, *result;
539     _Py_IDENTIFIER(__bytes__);
540     /* is it a bytes object? */
541     if (PyBytes_Check(v)) {
542         *pbuf = PyBytes_AS_STRING(v);
543         *plen = PyBytes_GET_SIZE(v);
544         Py_INCREF(v);
545         return v;
546     }
547     if (PyByteArray_Check(v)) {
548         *pbuf = PyByteArray_AS_STRING(v);
549         *plen = PyByteArray_GET_SIZE(v);
550         Py_INCREF(v);
551         return v;
552     }
553     /* does it support __bytes__? */
554     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
555     if (func != NULL) {
556         result = _PyObject_CallNoArg(func);
557         Py_DECREF(func);
558         if (result == NULL)
559             return NULL;
560         if (!PyBytes_Check(result)) {
561             PyErr_Format(PyExc_TypeError,
562                          "__bytes__ returned non-bytes (type %.200s)",
563                          Py_TYPE(result)->tp_name);
564             Py_DECREF(result);
565             return NULL;
566         }
567         *pbuf = PyBytes_AS_STRING(result);
568         *plen = PyBytes_GET_SIZE(result);
569         return result;
570     }
571     /* does it support buffer protocol? */
572     if (PyObject_CheckBuffer(v)) {
573         /* maybe we can avoid making a copy of the buffer object here? */
574         result = _PyBytes_FromBuffer(v);
575         if (result == NULL)
576             return NULL;
577         *pbuf = PyBytes_AS_STRING(result);
578         *plen = PyBytes_GET_SIZE(result);
579         return result;
580     }
581     PyErr_Format(PyExc_TypeError,
582                  "%%b requires a bytes-like object, "
583                  "or an object that implements __bytes__, not '%.100s'",
584                  Py_TYPE(v)->tp_name);
585     return NULL;
586 }
587 
588 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
589 
590 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)591 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
592                   PyObject *args, int use_bytearray)
593 {
594     const char *fmt;
595     char *res;
596     Py_ssize_t arglen, argidx;
597     Py_ssize_t fmtcnt;
598     int args_owned = 0;
599     PyObject *dict = NULL;
600     _PyBytesWriter writer;
601 
602     if (args == NULL) {
603         PyErr_BadInternalCall();
604         return NULL;
605     }
606     fmt = format;
607     fmtcnt = format_len;
608 
609     _PyBytesWriter_Init(&writer);
610     writer.use_bytearray = use_bytearray;
611 
612     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
613     if (res == NULL)
614         return NULL;
615     if (!use_bytearray)
616         writer.overallocate = 1;
617 
618     if (PyTuple_Check(args)) {
619         arglen = PyTuple_GET_SIZE(args);
620         argidx = 0;
621     }
622     else {
623         arglen = -1;
624         argidx = -2;
625     }
626     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
627         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
628         !PyByteArray_Check(args)) {
629             dict = args;
630     }
631 
632     while (--fmtcnt >= 0) {
633         if (*fmt != '%') {
634             Py_ssize_t len;
635             char *pos;
636 
637             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
638             if (pos != NULL)
639                 len = pos - fmt;
640             else
641                 len = fmtcnt + 1;
642             assert(len != 0);
643 
644             memcpy(res, fmt, len);
645             res += len;
646             fmt += len;
647             fmtcnt -= (len - 1);
648         }
649         else {
650             /* Got a format specifier */
651             int flags = 0;
652             Py_ssize_t width = -1;
653             int prec = -1;
654             int c = '\0';
655             int fill;
656             PyObject *v = NULL;
657             PyObject *temp = NULL;
658             const char *pbuf = NULL;
659             int sign;
660             Py_ssize_t len = 0;
661             char onechar; /* For byte_converter() */
662             Py_ssize_t alloc;
663 #ifdef Py_DEBUG
664             char *before;
665 #endif
666 
667             fmt++;
668             if (*fmt == '%') {
669                 *res++ = '%';
670                 fmt++;
671                 fmtcnt--;
672                 continue;
673             }
674             if (*fmt == '(') {
675                 const char *keystart;
676                 Py_ssize_t keylen;
677                 PyObject *key;
678                 int pcount = 1;
679 
680                 if (dict == NULL) {
681                     PyErr_SetString(PyExc_TypeError,
682                              "format requires a mapping");
683                     goto error;
684                 }
685                 ++fmt;
686                 --fmtcnt;
687                 keystart = fmt;
688                 /* Skip over balanced parentheses */
689                 while (pcount > 0 && --fmtcnt >= 0) {
690                     if (*fmt == ')')
691                         --pcount;
692                     else if (*fmt == '(')
693                         ++pcount;
694                     fmt++;
695                 }
696                 keylen = fmt - keystart - 1;
697                 if (fmtcnt < 0 || pcount > 0) {
698                     PyErr_SetString(PyExc_ValueError,
699                                "incomplete format key");
700                     goto error;
701                 }
702                 key = PyBytes_FromStringAndSize(keystart,
703                                                  keylen);
704                 if (key == NULL)
705                     goto error;
706                 if (args_owned) {
707                     Py_DECREF(args);
708                     args_owned = 0;
709                 }
710                 args = PyObject_GetItem(dict, key);
711                 Py_DECREF(key);
712                 if (args == NULL) {
713                     goto error;
714                 }
715                 args_owned = 1;
716                 arglen = -1;
717                 argidx = -2;
718             }
719 
720             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
721             while (--fmtcnt >= 0) {
722                 switch (c = *fmt++) {
723                 case '-': flags |= F_LJUST; continue;
724                 case '+': flags |= F_SIGN; continue;
725                 case ' ': flags |= F_BLANK; continue;
726                 case '#': flags |= F_ALT; continue;
727                 case '0': flags |= F_ZERO; continue;
728                 }
729                 break;
730             }
731 
732             /* Parse width. Example: "%10s" => width=10 */
733             if (c == '*') {
734                 v = getnextarg(args, arglen, &argidx);
735                 if (v == NULL)
736                     goto error;
737                 if (!PyLong_Check(v)) {
738                     PyErr_SetString(PyExc_TypeError,
739                                     "* wants int");
740                     goto error;
741                 }
742                 width = PyLong_AsSsize_t(v);
743                 if (width == -1 && PyErr_Occurred())
744                     goto error;
745                 if (width < 0) {
746                     flags |= F_LJUST;
747                     width = -width;
748                 }
749                 if (--fmtcnt >= 0)
750                     c = *fmt++;
751             }
752             else if (c >= 0 && isdigit(c)) {
753                 width = c - '0';
754                 while (--fmtcnt >= 0) {
755                     c = Py_CHARMASK(*fmt++);
756                     if (!isdigit(c))
757                         break;
758                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
759                         PyErr_SetString(
760                             PyExc_ValueError,
761                             "width too big");
762                         goto error;
763                     }
764                     width = width*10 + (c - '0');
765                 }
766             }
767 
768             /* Parse precision. Example: "%.3f" => prec=3 */
769             if (c == '.') {
770                 prec = 0;
771                 if (--fmtcnt >= 0)
772                     c = *fmt++;
773                 if (c == '*') {
774                     v = getnextarg(args, arglen, &argidx);
775                     if (v == NULL)
776                         goto error;
777                     if (!PyLong_Check(v)) {
778                         PyErr_SetString(
779                             PyExc_TypeError,
780                             "* wants int");
781                         goto error;
782                     }
783                     prec = _PyLong_AsInt(v);
784                     if (prec == -1 && PyErr_Occurred())
785                         goto error;
786                     if (prec < 0)
787                         prec = 0;
788                     if (--fmtcnt >= 0)
789                         c = *fmt++;
790                 }
791                 else if (c >= 0 && isdigit(c)) {
792                     prec = c - '0';
793                     while (--fmtcnt >= 0) {
794                         c = Py_CHARMASK(*fmt++);
795                         if (!isdigit(c))
796                             break;
797                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
798                             PyErr_SetString(
799                                 PyExc_ValueError,
800                                 "prec too big");
801                             goto error;
802                         }
803                         prec = prec*10 + (c - '0');
804                     }
805                 }
806             } /* prec */
807             if (fmtcnt >= 0) {
808                 if (c == 'h' || c == 'l' || c == 'L') {
809                     if (--fmtcnt >= 0)
810                         c = *fmt++;
811                 }
812             }
813             if (fmtcnt < 0) {
814                 PyErr_SetString(PyExc_ValueError,
815                                 "incomplete format");
816                 goto error;
817             }
818             v = getnextarg(args, arglen, &argidx);
819             if (v == NULL)
820                 goto error;
821 
822             if (fmtcnt < 0) {
823                 /* last writer: disable writer overallocation */
824                 writer.overallocate = 0;
825             }
826 
827             sign = 0;
828             fill = ' ';
829             switch (c) {
830             case 'r':
831                 // %r is only for 2/3 code; 3 only code should use %a
832             case 'a':
833                 temp = PyObject_ASCII(v);
834                 if (temp == NULL)
835                     goto error;
836                 assert(PyUnicode_IS_ASCII(temp));
837                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
838                 len = PyUnicode_GET_LENGTH(temp);
839                 if (prec >= 0 && len > prec)
840                     len = prec;
841                 break;
842 
843             case 's':
844                 // %s is only for 2/3 code; 3 only code should use %b
845             case 'b':
846                 temp = format_obj(v, &pbuf, &len);
847                 if (temp == NULL)
848                     goto error;
849                 if (prec >= 0 && len > prec)
850                     len = prec;
851                 break;
852 
853             case 'i':
854             case 'd':
855             case 'u':
856             case 'o':
857             case 'x':
858             case 'X':
859                 if (PyLong_CheckExact(v)
860                     && width == -1 && prec == -1
861                     && !(flags & (F_SIGN | F_BLANK))
862                     && c != 'X')
863                 {
864                     /* Fast path */
865                     int alternate = flags & F_ALT;
866                     int base;
867 
868                     switch(c)
869                     {
870                         default:
871                             Py_UNREACHABLE();
872                         case 'd':
873                         case 'i':
874                         case 'u':
875                             base = 10;
876                             break;
877                         case 'o':
878                             base = 8;
879                             break;
880                         case 'x':
881                         case 'X':
882                             base = 16;
883                             break;
884                     }
885 
886                     /* Fast path */
887                     writer.min_size -= 2; /* size preallocated for "%d" */
888                     res = _PyLong_FormatBytesWriter(&writer, res,
889                                                     v, base, alternate);
890                     if (res == NULL)
891                         goto error;
892                     continue;
893                 }
894 
895                 temp = formatlong(v, flags, prec, c);
896                 if (!temp)
897                     goto error;
898                 assert(PyUnicode_IS_ASCII(temp));
899                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
900                 len = PyUnicode_GET_LENGTH(temp);
901                 sign = 1;
902                 if (flags & F_ZERO)
903                     fill = '0';
904                 break;
905 
906             case 'e':
907             case 'E':
908             case 'f':
909             case 'F':
910             case 'g':
911             case 'G':
912                 if (width == -1 && prec == -1
913                     && !(flags & (F_SIGN | F_BLANK)))
914                 {
915                     /* Fast path */
916                     writer.min_size -= 2; /* size preallocated for "%f" */
917                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
918                     if (res == NULL)
919                         goto error;
920                     continue;
921                 }
922 
923                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
924                     goto error;
925                 pbuf = PyBytes_AS_STRING(temp);
926                 len = PyBytes_GET_SIZE(temp);
927                 sign = 1;
928                 if (flags & F_ZERO)
929                     fill = '0';
930                 break;
931 
932             case 'c':
933                 pbuf = &onechar;
934                 len = byte_converter(v, &onechar);
935                 if (!len)
936                     goto error;
937                 if (width == -1) {
938                     /* Fast path */
939                     *res++ = onechar;
940                     continue;
941                 }
942                 break;
943 
944             default:
945                 PyErr_Format(PyExc_ValueError,
946                   "unsupported format character '%c' (0x%x) "
947                   "at index %zd",
948                   c, c,
949                   (Py_ssize_t)(fmt - 1 - format));
950                 goto error;
951             }
952 
953             if (sign) {
954                 if (*pbuf == '-' || *pbuf == '+') {
955                     sign = *pbuf++;
956                     len--;
957                 }
958                 else if (flags & F_SIGN)
959                     sign = '+';
960                 else if (flags & F_BLANK)
961                     sign = ' ';
962                 else
963                     sign = 0;
964             }
965             if (width < len)
966                 width = len;
967 
968             alloc = width;
969             if (sign != 0 && len == width)
970                 alloc++;
971             /* 2: size preallocated for %s */
972             if (alloc > 2) {
973                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
974                 if (res == NULL)
975                     goto error;
976             }
977 #ifdef Py_DEBUG
978             before = res;
979 #endif
980 
981             /* Write the sign if needed */
982             if (sign) {
983                 if (fill != ' ')
984                     *res++ = sign;
985                 if (width > len)
986                     width--;
987             }
988 
989             /* Write the numeric prefix for "x", "X" and "o" formats
990                if the alternate form is used.
991                For example, write "0x" for the "%#x" format. */
992             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
993                 assert(pbuf[0] == '0');
994                 assert(pbuf[1] == c);
995                 if (fill != ' ') {
996                     *res++ = *pbuf++;
997                     *res++ = *pbuf++;
998                 }
999                 width -= 2;
1000                 if (width < 0)
1001                     width = 0;
1002                 len -= 2;
1003             }
1004 
1005             /* Pad left with the fill character if needed */
1006             if (width > len && !(flags & F_LJUST)) {
1007                 memset(res, fill, width - len);
1008                 res += (width - len);
1009                 width = len;
1010             }
1011 
1012             /* If padding with spaces: write sign if needed and/or numeric
1013                prefix if the alternate form is used */
1014             if (fill == ' ') {
1015                 if (sign)
1016                     *res++ = sign;
1017                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1018                     assert(pbuf[0] == '0');
1019                     assert(pbuf[1] == c);
1020                     *res++ = *pbuf++;
1021                     *res++ = *pbuf++;
1022                 }
1023             }
1024 
1025             /* Copy bytes */
1026             memcpy(res, pbuf, len);
1027             res += len;
1028 
1029             /* Pad right with the fill character if needed */
1030             if (width > len) {
1031                 memset(res, ' ', width - len);
1032                 res += (width - len);
1033             }
1034 
1035             if (dict && (argidx < arglen)) {
1036                 PyErr_SetString(PyExc_TypeError,
1037                            "not all arguments converted during bytes formatting");
1038                 Py_XDECREF(temp);
1039                 goto error;
1040             }
1041             Py_XDECREF(temp);
1042 
1043 #ifdef Py_DEBUG
1044             /* check that we computed the exact size for this write */
1045             assert((res - before) == alloc);
1046 #endif
1047         } /* '%' */
1048 
1049         /* If overallocation was disabled, ensure that it was the last
1050            write. Otherwise, we missed an optimization */
1051         assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
1052     } /* until end */
1053 
1054     if (argidx < arglen && !dict) {
1055         PyErr_SetString(PyExc_TypeError,
1056                         "not all arguments converted during bytes formatting");
1057         goto error;
1058     }
1059 
1060     if (args_owned) {
1061         Py_DECREF(args);
1062     }
1063     return _PyBytesWriter_Finish(&writer, res);
1064 
1065  error:
1066     _PyBytesWriter_Dealloc(&writer);
1067     if (args_owned) {
1068         Py_DECREF(args);
1069     }
1070     return NULL;
1071 }
1072 
1073 /* =-= */
1074 
1075 static void
bytes_dealloc(PyObject * op)1076 bytes_dealloc(PyObject *op)
1077 {
1078     Py_TYPE(op)->tp_free(op);
1079 }
1080 
1081 /* Unescape a backslash-escaped string. If unicode is non-zero,
1082    the string is a u-literal. If recode_encoding is non-zero,
1083    the string is UTF-8 encoded and should be re-encoded in the
1084    specified encoding.  */
1085 
1086 static char *
_PyBytes_DecodeEscapeRecode(const char ** s,const char * end,const char * errors,const char * recode_encoding,_PyBytesWriter * writer,char * p)1087 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1088                             const char *errors, const char *recode_encoding,
1089                             _PyBytesWriter *writer, char *p)
1090 {
1091     PyObject *u, *w;
1092     const char* t;
1093 
1094     t = *s;
1095     /* Decode non-ASCII bytes as UTF-8. */
1096     while (t < end && (*t & 0x80))
1097         t++;
1098     u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1099     if (u == NULL)
1100         return NULL;
1101 
1102     /* Recode them in target encoding. */
1103     w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1104     Py_DECREF(u);
1105     if  (w == NULL)
1106         return NULL;
1107     assert(PyBytes_Check(w));
1108 
1109     /* Append bytes to output buffer. */
1110     writer->min_size--;   /* subtract 1 preallocated byte */
1111     p = _PyBytesWriter_WriteBytes(writer, p,
1112                                   PyBytes_AS_STRING(w),
1113                                   PyBytes_GET_SIZE(w));
1114     Py_DECREF(w);
1115     if (p == NULL)
1116         return NULL;
1117 
1118     *s = t;
1119     return p;
1120 }
1121 
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding,const char ** first_invalid_escape)1122 PyObject *_PyBytes_DecodeEscape(const char *s,
1123                                 Py_ssize_t len,
1124                                 const char *errors,
1125                                 Py_ssize_t unicode,
1126                                 const char *recode_encoding,
1127                                 const char **first_invalid_escape)
1128 {
1129     int c;
1130     char *p;
1131     const char *end;
1132     _PyBytesWriter writer;
1133 
1134     _PyBytesWriter_Init(&writer);
1135 
1136     p = _PyBytesWriter_Alloc(&writer, len);
1137     if (p == NULL)
1138         return NULL;
1139     writer.overallocate = 1;
1140 
1141     *first_invalid_escape = NULL;
1142 
1143     end = s + len;
1144     while (s < end) {
1145         if (*s != '\\') {
1146           non_esc:
1147             if (!(recode_encoding && (*s & 0x80))) {
1148                 *p++ = *s++;
1149             }
1150             else {
1151                 /* non-ASCII character and need to recode */
1152                 p = _PyBytes_DecodeEscapeRecode(&s, end,
1153                                                 errors, recode_encoding,
1154                                                 &writer, p);
1155                 if (p == NULL)
1156                     goto failed;
1157             }
1158             continue;
1159         }
1160 
1161         s++;
1162         if (s == end) {
1163             PyErr_SetString(PyExc_ValueError,
1164                             "Trailing \\ in string");
1165             goto failed;
1166         }
1167 
1168         switch (*s++) {
1169         /* XXX This assumes ASCII! */
1170         case '\n': break;
1171         case '\\': *p++ = '\\'; break;
1172         case '\'': *p++ = '\''; break;
1173         case '\"': *p++ = '\"'; break;
1174         case 'b': *p++ = '\b'; break;
1175         case 'f': *p++ = '\014'; break; /* FF */
1176         case 't': *p++ = '\t'; break;
1177         case 'n': *p++ = '\n'; break;
1178         case 'r': *p++ = '\r'; break;
1179         case 'v': *p++ = '\013'; break; /* VT */
1180         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1181         case '0': case '1': case '2': case '3':
1182         case '4': case '5': case '6': case '7':
1183             c = s[-1] - '0';
1184             if (s < end && '0' <= *s && *s <= '7') {
1185                 c = (c<<3) + *s++ - '0';
1186                 if (s < end && '0' <= *s && *s <= '7')
1187                     c = (c<<3) + *s++ - '0';
1188             }
1189             *p++ = c;
1190             break;
1191         case 'x':
1192             if (s+1 < end) {
1193                 int digit1, digit2;
1194                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1195                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1196                 if (digit1 < 16 && digit2 < 16) {
1197                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1198                     s += 2;
1199                     break;
1200                 }
1201             }
1202             /* invalid hexadecimal digits */
1203 
1204             if (!errors || strcmp(errors, "strict") == 0) {
1205                 PyErr_Format(PyExc_ValueError,
1206                              "invalid \\x escape at position %d",
1207                              s - 2 - (end - len));
1208                 goto failed;
1209             }
1210             if (strcmp(errors, "replace") == 0) {
1211                 *p++ = '?';
1212             } else if (strcmp(errors, "ignore") == 0)
1213                 /* do nothing */;
1214             else {
1215                 PyErr_Format(PyExc_ValueError,
1216                              "decoding error; unknown "
1217                              "error handling code: %.400s",
1218                              errors);
1219                 goto failed;
1220             }
1221             /* skip \x */
1222             if (s < end && Py_ISXDIGIT(s[0]))
1223                 s++; /* and a hexdigit */
1224             break;
1225 
1226         default:
1227             if (*first_invalid_escape == NULL) {
1228                 *first_invalid_escape = s-1; /* Back up one char, since we've
1229                                                 already incremented s. */
1230             }
1231             *p++ = '\\';
1232             s--;
1233             goto non_esc; /* an arbitrary number of unescaped
1234                              UTF-8 bytes may follow. */
1235         }
1236     }
1237 
1238     return _PyBytesWriter_Finish(&writer, p);
1239 
1240   failed:
1241     _PyBytesWriter_Dealloc(&writer);
1242     return NULL;
1243 }
1244 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)1245 PyObject *PyBytes_DecodeEscape(const char *s,
1246                                 Py_ssize_t len,
1247                                 const char *errors,
1248                                 Py_ssize_t unicode,
1249                                 const char *recode_encoding)
1250 {
1251     const char* first_invalid_escape;
1252     PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1253                                              recode_encoding,
1254                                              &first_invalid_escape);
1255     if (result == NULL)
1256         return NULL;
1257     if (first_invalid_escape != NULL) {
1258         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1259                              "invalid escape sequence '\\%c'",
1260                              (unsigned char)*first_invalid_escape) < 0) {
1261             Py_DECREF(result);
1262             return NULL;
1263         }
1264     }
1265     return result;
1266 
1267 }
1268 /* -------------------------------------------------------------------- */
1269 /* object api */
1270 
1271 Py_ssize_t
PyBytes_Size(PyObject * op)1272 PyBytes_Size(PyObject *op)
1273 {
1274     if (!PyBytes_Check(op)) {
1275         PyErr_Format(PyExc_TypeError,
1276              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1277         return -1;
1278     }
1279     return Py_SIZE(op);
1280 }
1281 
1282 char *
PyBytes_AsString(PyObject * op)1283 PyBytes_AsString(PyObject *op)
1284 {
1285     if (!PyBytes_Check(op)) {
1286         PyErr_Format(PyExc_TypeError,
1287              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1288         return NULL;
1289     }
1290     return ((PyBytesObject *)op)->ob_sval;
1291 }
1292 
1293 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1294 PyBytes_AsStringAndSize(PyObject *obj,
1295                          char **s,
1296                          Py_ssize_t *len)
1297 {
1298     if (s == NULL) {
1299         PyErr_BadInternalCall();
1300         return -1;
1301     }
1302 
1303     if (!PyBytes_Check(obj)) {
1304         PyErr_Format(PyExc_TypeError,
1305              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1306         return -1;
1307     }
1308 
1309     *s = PyBytes_AS_STRING(obj);
1310     if (len != NULL)
1311         *len = PyBytes_GET_SIZE(obj);
1312     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1313         PyErr_SetString(PyExc_ValueError,
1314                         "embedded null byte");
1315         return -1;
1316     }
1317     return 0;
1318 }
1319 
1320 /* -------------------------------------------------------------------- */
1321 /* Methods */
1322 
1323 #include "stringlib/stringdefs.h"
1324 
1325 #include "stringlib/fastsearch.h"
1326 #include "stringlib/count.h"
1327 #include "stringlib/find.h"
1328 #include "stringlib/join.h"
1329 #include "stringlib/partition.h"
1330 #include "stringlib/split.h"
1331 #include "stringlib/ctype.h"
1332 
1333 #include "stringlib/transmogrify.h"
1334 
1335 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1336 PyBytes_Repr(PyObject *obj, int smartquotes)
1337 {
1338     PyBytesObject* op = (PyBytesObject*) obj;
1339     Py_ssize_t i, length = Py_SIZE(op);
1340     Py_ssize_t newsize, squotes, dquotes;
1341     PyObject *v;
1342     unsigned char quote, *s, *p;
1343 
1344     /* Compute size of output string */
1345     squotes = dquotes = 0;
1346     newsize = 3; /* b'' */
1347     s = (unsigned char*)op->ob_sval;
1348     for (i = 0; i < length; i++) {
1349         Py_ssize_t incr = 1;
1350         switch(s[i]) {
1351         case '\'': squotes++; break;
1352         case '"':  dquotes++; break;
1353         case '\\': case '\t': case '\n': case '\r':
1354             incr = 2; break; /* \C */
1355         default:
1356             if (s[i] < ' ' || s[i] >= 0x7f)
1357                 incr = 4; /* \xHH */
1358         }
1359         if (newsize > PY_SSIZE_T_MAX - incr)
1360             goto overflow;
1361         newsize += incr;
1362     }
1363     quote = '\'';
1364     if (smartquotes && squotes && !dquotes)
1365         quote = '"';
1366     if (squotes && quote == '\'') {
1367         if (newsize > PY_SSIZE_T_MAX - squotes)
1368             goto overflow;
1369         newsize += squotes;
1370     }
1371 
1372     v = PyUnicode_New(newsize, 127);
1373     if (v == NULL) {
1374         return NULL;
1375     }
1376     p = PyUnicode_1BYTE_DATA(v);
1377 
1378     *p++ = 'b', *p++ = quote;
1379     for (i = 0; i < length; i++) {
1380         unsigned char c = op->ob_sval[i];
1381         if (c == quote || c == '\\')
1382             *p++ = '\\', *p++ = c;
1383         else if (c == '\t')
1384             *p++ = '\\', *p++ = 't';
1385         else if (c == '\n')
1386             *p++ = '\\', *p++ = 'n';
1387         else if (c == '\r')
1388             *p++ = '\\', *p++ = 'r';
1389         else if (c < ' ' || c >= 0x7f) {
1390             *p++ = '\\';
1391             *p++ = 'x';
1392             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1393             *p++ = Py_hexdigits[c & 0xf];
1394         }
1395         else
1396             *p++ = c;
1397     }
1398     *p++ = quote;
1399     assert(_PyUnicode_CheckConsistency(v, 1));
1400     return v;
1401 
1402   overflow:
1403     PyErr_SetString(PyExc_OverflowError,
1404                     "bytes object is too large to make repr");
1405     return NULL;
1406 }
1407 
1408 static PyObject *
bytes_repr(PyObject * op)1409 bytes_repr(PyObject *op)
1410 {
1411     return PyBytes_Repr(op, 1);
1412 }
1413 
1414 static PyObject *
bytes_str(PyObject * op)1415 bytes_str(PyObject *op)
1416 {
1417     if (Py_BytesWarningFlag) {
1418         if (PyErr_WarnEx(PyExc_BytesWarning,
1419                          "str() on a bytes instance", 1))
1420             return NULL;
1421     }
1422     return bytes_repr(op);
1423 }
1424 
1425 static Py_ssize_t
bytes_length(PyBytesObject * a)1426 bytes_length(PyBytesObject *a)
1427 {
1428     return Py_SIZE(a);
1429 }
1430 
1431 /* This is also used by PyBytes_Concat() */
1432 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1433 bytes_concat(PyObject *a, PyObject *b)
1434 {
1435     Py_buffer va, vb;
1436     PyObject *result = NULL;
1437 
1438     va.len = -1;
1439     vb.len = -1;
1440     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1441         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1442         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1443                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1444         goto done;
1445     }
1446 
1447     /* Optimize end cases */
1448     if (va.len == 0 && PyBytes_CheckExact(b)) {
1449         result = b;
1450         Py_INCREF(result);
1451         goto done;
1452     }
1453     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1454         result = a;
1455         Py_INCREF(result);
1456         goto done;
1457     }
1458 
1459     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1460         PyErr_NoMemory();
1461         goto done;
1462     }
1463 
1464     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1465     if (result != NULL) {
1466         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1467         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1468     }
1469 
1470   done:
1471     if (va.len != -1)
1472         PyBuffer_Release(&va);
1473     if (vb.len != -1)
1474         PyBuffer_Release(&vb);
1475     return result;
1476 }
1477 
1478 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1479 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1480 {
1481     Py_ssize_t i;
1482     Py_ssize_t j;
1483     Py_ssize_t size;
1484     PyBytesObject *op;
1485     size_t nbytes;
1486     if (n < 0)
1487         n = 0;
1488     /* watch out for overflows:  the size can overflow int,
1489      * and the # of bytes needed can overflow size_t
1490      */
1491     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1492         PyErr_SetString(PyExc_OverflowError,
1493             "repeated bytes are too long");
1494         return NULL;
1495     }
1496     size = Py_SIZE(a) * n;
1497     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1498         Py_INCREF(a);
1499         return (PyObject *)a;
1500     }
1501     nbytes = (size_t)size;
1502     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1503         PyErr_SetString(PyExc_OverflowError,
1504             "repeated bytes are too long");
1505         return NULL;
1506     }
1507     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1508     if (op == NULL)
1509         return PyErr_NoMemory();
1510     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1511     op->ob_shash = -1;
1512     op->ob_sval[size] = '\0';
1513     if (Py_SIZE(a) == 1 && n > 0) {
1514         memset(op->ob_sval, a->ob_sval[0] , n);
1515         return (PyObject *) op;
1516     }
1517     i = 0;
1518     if (i < size) {
1519         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1520         i = Py_SIZE(a);
1521     }
1522     while (i < size) {
1523         j = (i <= size-i)  ?  i  :  size-i;
1524         memcpy(op->ob_sval+i, op->ob_sval, j);
1525         i += j;
1526     }
1527     return (PyObject *) op;
1528 }
1529 
1530 static int
bytes_contains(PyObject * self,PyObject * arg)1531 bytes_contains(PyObject *self, PyObject *arg)
1532 {
1533     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1534 }
1535 
1536 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1537 bytes_item(PyBytesObject *a, Py_ssize_t i)
1538 {
1539     if (i < 0 || i >= Py_SIZE(a)) {
1540         PyErr_SetString(PyExc_IndexError, "index out of range");
1541         return NULL;
1542     }
1543     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1544 }
1545 
1546 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1547 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1548 {
1549     int cmp;
1550     Py_ssize_t len;
1551 
1552     len = Py_SIZE(a);
1553     if (Py_SIZE(b) != len)
1554         return 0;
1555 
1556     if (a->ob_sval[0] != b->ob_sval[0])
1557         return 0;
1558 
1559     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1560     return (cmp == 0);
1561 }
1562 
1563 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1564 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1565 {
1566     int c;
1567     Py_ssize_t len_a, len_b;
1568     Py_ssize_t min_len;
1569     PyObject *result;
1570     int rc;
1571 
1572     /* Make sure both arguments are strings. */
1573     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1574         if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1575             rc = PyObject_IsInstance((PyObject*)a,
1576                                      (PyObject*)&PyUnicode_Type);
1577             if (!rc)
1578                 rc = PyObject_IsInstance((PyObject*)b,
1579                                          (PyObject*)&PyUnicode_Type);
1580             if (rc < 0)
1581                 return NULL;
1582             if (rc) {
1583                 if (PyErr_WarnEx(PyExc_BytesWarning,
1584                                  "Comparison between bytes and string", 1))
1585                     return NULL;
1586             }
1587             else {
1588                 rc = PyObject_IsInstance((PyObject*)a,
1589                                          (PyObject*)&PyLong_Type);
1590                 if (!rc)
1591                     rc = PyObject_IsInstance((PyObject*)b,
1592                                              (PyObject*)&PyLong_Type);
1593                 if (rc < 0)
1594                     return NULL;
1595                 if (rc) {
1596                     if (PyErr_WarnEx(PyExc_BytesWarning,
1597                                      "Comparison between bytes and int", 1))
1598                         return NULL;
1599                 }
1600             }
1601         }
1602         result = Py_NotImplemented;
1603     }
1604     else if (a == b) {
1605         switch (op) {
1606         case Py_EQ:
1607         case Py_LE:
1608         case Py_GE:
1609             /* a string is equal to itself */
1610             result = Py_True;
1611             break;
1612         case Py_NE:
1613         case Py_LT:
1614         case Py_GT:
1615             result = Py_False;
1616             break;
1617         default:
1618             PyErr_BadArgument();
1619             return NULL;
1620         }
1621     }
1622     else if (op == Py_EQ || op == Py_NE) {
1623         int eq = bytes_compare_eq(a, b);
1624         eq ^= (op == Py_NE);
1625         result = eq ? Py_True : Py_False;
1626     }
1627     else {
1628         len_a = Py_SIZE(a);
1629         len_b = Py_SIZE(b);
1630         min_len = Py_MIN(len_a, len_b);
1631         if (min_len > 0) {
1632             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1633             if (c == 0)
1634                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1635         }
1636         else
1637             c = 0;
1638         if (c == 0)
1639             c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1640         switch (op) {
1641         case Py_LT: c = c <  0; break;
1642         case Py_LE: c = c <= 0; break;
1643         case Py_GT: c = c >  0; break;
1644         case Py_GE: c = c >= 0; break;
1645         default:
1646             PyErr_BadArgument();
1647             return NULL;
1648         }
1649         result = c ? Py_True : Py_False;
1650     }
1651 
1652     Py_INCREF(result);
1653     return result;
1654 }
1655 
1656 static Py_hash_t
bytes_hash(PyBytesObject * a)1657 bytes_hash(PyBytesObject *a)
1658 {
1659     if (a->ob_shash == -1) {
1660         /* Can't fail */
1661         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1662     }
1663     return a->ob_shash;
1664 }
1665 
1666 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1667 bytes_subscript(PyBytesObject* self, PyObject* item)
1668 {
1669     if (PyIndex_Check(item)) {
1670         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1671         if (i == -1 && PyErr_Occurred())
1672             return NULL;
1673         if (i < 0)
1674             i += PyBytes_GET_SIZE(self);
1675         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1676             PyErr_SetString(PyExc_IndexError,
1677                             "index out of range");
1678             return NULL;
1679         }
1680         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1681     }
1682     else if (PySlice_Check(item)) {
1683         Py_ssize_t start, stop, step, slicelength, cur, i;
1684         char* source_buf;
1685         char* result_buf;
1686         PyObject* result;
1687 
1688         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1689             return NULL;
1690         }
1691         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1692                                             &stop, step);
1693 
1694         if (slicelength <= 0) {
1695             return PyBytes_FromStringAndSize("", 0);
1696         }
1697         else if (start == 0 && step == 1 &&
1698                  slicelength == PyBytes_GET_SIZE(self) &&
1699                  PyBytes_CheckExact(self)) {
1700             Py_INCREF(self);
1701             return (PyObject *)self;
1702         }
1703         else if (step == 1) {
1704             return PyBytes_FromStringAndSize(
1705                 PyBytes_AS_STRING(self) + start,
1706                 slicelength);
1707         }
1708         else {
1709             source_buf = PyBytes_AS_STRING(self);
1710             result = PyBytes_FromStringAndSize(NULL, slicelength);
1711             if (result == NULL)
1712                 return NULL;
1713 
1714             result_buf = PyBytes_AS_STRING(result);
1715             for (cur = start, i = 0; i < slicelength;
1716                  cur += step, i++) {
1717                 result_buf[i] = source_buf[cur];
1718             }
1719 
1720             return result;
1721         }
1722     }
1723     else {
1724         PyErr_Format(PyExc_TypeError,
1725                      "byte indices must be integers or slices, not %.200s",
1726                      Py_TYPE(item)->tp_name);
1727         return NULL;
1728     }
1729 }
1730 
1731 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1732 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1733 {
1734     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1735                              1, flags);
1736 }
1737 
1738 static PySequenceMethods bytes_as_sequence = {
1739     (lenfunc)bytes_length, /*sq_length*/
1740     (binaryfunc)bytes_concat, /*sq_concat*/
1741     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1742     (ssizeargfunc)bytes_item, /*sq_item*/
1743     0,                  /*sq_slice*/
1744     0,                  /*sq_ass_item*/
1745     0,                  /*sq_ass_slice*/
1746     (objobjproc)bytes_contains /*sq_contains*/
1747 };
1748 
1749 static PyMappingMethods bytes_as_mapping = {
1750     (lenfunc)bytes_length,
1751     (binaryfunc)bytes_subscript,
1752     0,
1753 };
1754 
1755 static PyBufferProcs bytes_as_buffer = {
1756     (getbufferproc)bytes_buffer_getbuffer,
1757     NULL,
1758 };
1759 
1760 
1761 #define LEFTSTRIP 0
1762 #define RIGHTSTRIP 1
1763 #define BOTHSTRIP 2
1764 
1765 /*[clinic input]
1766 bytes.split
1767 
1768     sep: object = None
1769         The delimiter according which to split the bytes.
1770         None (the default value) means split on ASCII whitespace characters
1771         (space, tab, return, newline, formfeed, vertical tab).
1772     maxsplit: Py_ssize_t = -1
1773         Maximum number of splits to do.
1774         -1 (the default value) means no limit.
1775 
1776 Return a list of the sections in the bytes, using sep as the delimiter.
1777 [clinic start generated code]*/
1778 
1779 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1780 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1781 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1782 {
1783     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1784     const char *s = PyBytes_AS_STRING(self), *sub;
1785     Py_buffer vsub;
1786     PyObject *list;
1787 
1788     if (maxsplit < 0)
1789         maxsplit = PY_SSIZE_T_MAX;
1790     if (sep == Py_None)
1791         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1792     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1793         return NULL;
1794     sub = vsub.buf;
1795     n = vsub.len;
1796 
1797     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1798     PyBuffer_Release(&vsub);
1799     return list;
1800 }
1801 
1802 /*[clinic input]
1803 bytes.partition
1804 
1805     sep: Py_buffer
1806     /
1807 
1808 Partition the bytes into three parts using the given separator.
1809 
1810 This will search for the separator sep in the bytes. If the separator is found,
1811 returns a 3-tuple containing the part before the separator, the separator
1812 itself, and the part after it.
1813 
1814 If the separator is not found, returns a 3-tuple containing the original bytes
1815 object and two empty bytes objects.
1816 [clinic start generated code]*/
1817 
1818 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1819 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1820 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1821 {
1822     return stringlib_partition(
1823         (PyObject*) self,
1824         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1825         sep->obj, (const char *)sep->buf, sep->len
1826         );
1827 }
1828 
1829 /*[clinic input]
1830 bytes.rpartition
1831 
1832     sep: Py_buffer
1833     /
1834 
1835 Partition the bytes into three parts using the given separator.
1836 
1837 This will search for the separator sep in the bytes, starting at the end. If
1838 the separator is found, returns a 3-tuple containing the part before the
1839 separator, the separator itself, and the part after it.
1840 
1841 If the separator is not found, returns a 3-tuple containing two empty bytes
1842 objects and the original bytes object.
1843 [clinic start generated code]*/
1844 
1845 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1846 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1847 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1848 {
1849     return stringlib_rpartition(
1850         (PyObject*) self,
1851         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1852         sep->obj, (const char *)sep->buf, sep->len
1853         );
1854 }
1855 
1856 /*[clinic input]
1857 bytes.rsplit = bytes.split
1858 
1859 Return a list of the sections in the bytes, using sep as the delimiter.
1860 
1861 Splitting is done starting at the end of the bytes and working to the front.
1862 [clinic start generated code]*/
1863 
1864 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1865 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1866 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1867 {
1868     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1869     const char *s = PyBytes_AS_STRING(self), *sub;
1870     Py_buffer vsub;
1871     PyObject *list;
1872 
1873     if (maxsplit < 0)
1874         maxsplit = PY_SSIZE_T_MAX;
1875     if (sep == Py_None)
1876         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1877     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1878         return NULL;
1879     sub = vsub.buf;
1880     n = vsub.len;
1881 
1882     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1883     PyBuffer_Release(&vsub);
1884     return list;
1885 }
1886 
1887 
1888 /*[clinic input]
1889 bytes.join
1890 
1891     iterable_of_bytes: object
1892     /
1893 
1894 Concatenate any number of bytes objects.
1895 
1896 The bytes whose method is called is inserted in between each pair.
1897 
1898 The result is returned as a new bytes object.
1899 
1900 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1901 [clinic start generated code]*/
1902 
1903 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1904 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1905 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1906 {
1907     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1908 }
1909 
1910 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1911 _PyBytes_Join(PyObject *sep, PyObject *x)
1912 {
1913     assert(sep != NULL && PyBytes_Check(sep));
1914     assert(x != NULL);
1915     return bytes_join((PyBytesObject*)sep, x);
1916 }
1917 
1918 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1919 bytes_find(PyBytesObject *self, PyObject *args)
1920 {
1921     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1922 }
1923 
1924 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1925 bytes_index(PyBytesObject *self, PyObject *args)
1926 {
1927     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1928 }
1929 
1930 
1931 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1932 bytes_rfind(PyBytesObject *self, PyObject *args)
1933 {
1934     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1935 }
1936 
1937 
1938 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1939 bytes_rindex(PyBytesObject *self, PyObject *args)
1940 {
1941     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1942 }
1943 
1944 
1945 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1946 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1947 {
1948     Py_buffer vsep;
1949     char *s = PyBytes_AS_STRING(self);
1950     Py_ssize_t len = PyBytes_GET_SIZE(self);
1951     char *sep;
1952     Py_ssize_t seplen;
1953     Py_ssize_t i, j;
1954 
1955     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1956         return NULL;
1957     sep = vsep.buf;
1958     seplen = vsep.len;
1959 
1960     i = 0;
1961     if (striptype != RIGHTSTRIP) {
1962         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1963             i++;
1964         }
1965     }
1966 
1967     j = len;
1968     if (striptype != LEFTSTRIP) {
1969         do {
1970             j--;
1971         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1972         j++;
1973     }
1974 
1975     PyBuffer_Release(&vsep);
1976 
1977     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1978         Py_INCREF(self);
1979         return (PyObject*)self;
1980     }
1981     else
1982         return PyBytes_FromStringAndSize(s+i, j-i);
1983 }
1984 
1985 
1986 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1987 do_strip(PyBytesObject *self, int striptype)
1988 {
1989     char *s = PyBytes_AS_STRING(self);
1990     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1991 
1992     i = 0;
1993     if (striptype != RIGHTSTRIP) {
1994         while (i < len && Py_ISSPACE(s[i])) {
1995             i++;
1996         }
1997     }
1998 
1999     j = len;
2000     if (striptype != LEFTSTRIP) {
2001         do {
2002             j--;
2003         } while (j >= i && Py_ISSPACE(s[j]));
2004         j++;
2005     }
2006 
2007     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2008         Py_INCREF(self);
2009         return (PyObject*)self;
2010     }
2011     else
2012         return PyBytes_FromStringAndSize(s+i, j-i);
2013 }
2014 
2015 
2016 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)2017 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2018 {
2019     if (bytes != NULL && bytes != Py_None) {
2020         return do_xstrip(self, striptype, bytes);
2021     }
2022     return do_strip(self, striptype);
2023 }
2024 
2025 /*[clinic input]
2026 bytes.strip
2027 
2028     bytes: object = None
2029     /
2030 
2031 Strip leading and trailing bytes contained in the argument.
2032 
2033 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2034 [clinic start generated code]*/
2035 
2036 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2037 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2038 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2039 {
2040     return do_argstrip(self, BOTHSTRIP, bytes);
2041 }
2042 
2043 /*[clinic input]
2044 bytes.lstrip
2045 
2046     bytes: object = None
2047     /
2048 
2049 Strip leading bytes contained in the argument.
2050 
2051 If the argument is omitted or None, strip leading  ASCII whitespace.
2052 [clinic start generated code]*/
2053 
2054 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2055 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2056 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2057 {
2058     return do_argstrip(self, LEFTSTRIP, bytes);
2059 }
2060 
2061 /*[clinic input]
2062 bytes.rstrip
2063 
2064     bytes: object = None
2065     /
2066 
2067 Strip trailing bytes contained in the argument.
2068 
2069 If the argument is omitted or None, strip trailing ASCII whitespace.
2070 [clinic start generated code]*/
2071 
2072 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2073 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2074 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2075 {
2076     return do_argstrip(self, RIGHTSTRIP, bytes);
2077 }
2078 
2079 
2080 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2081 bytes_count(PyBytesObject *self, PyObject *args)
2082 {
2083     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2084 }
2085 
2086 
2087 /*[clinic input]
2088 bytes.translate
2089 
2090     table: object
2091         Translation table, which must be a bytes object of length 256.
2092     /
2093     delete as deletechars: object(c_default="NULL") = b''
2094 
2095 Return a copy with each character mapped by the given translation table.
2096 
2097 All characters occurring in the optional argument delete are removed.
2098 The remaining characters are mapped through the given translation table.
2099 [clinic start generated code]*/
2100 
2101 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2102 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2103                      PyObject *deletechars)
2104 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2105 {
2106     char *input, *output;
2107     Py_buffer table_view = {NULL, NULL};
2108     Py_buffer del_table_view = {NULL, NULL};
2109     const char *table_chars;
2110     Py_ssize_t i, c, changed = 0;
2111     PyObject *input_obj = (PyObject*)self;
2112     const char *output_start, *del_table_chars=NULL;
2113     Py_ssize_t inlen, tablen, dellen = 0;
2114     PyObject *result;
2115     int trans_table[256];
2116 
2117     if (PyBytes_Check(table)) {
2118         table_chars = PyBytes_AS_STRING(table);
2119         tablen = PyBytes_GET_SIZE(table);
2120     }
2121     else if (table == Py_None) {
2122         table_chars = NULL;
2123         tablen = 256;
2124     }
2125     else {
2126         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2127             return NULL;
2128         table_chars = table_view.buf;
2129         tablen = table_view.len;
2130     }
2131 
2132     if (tablen != 256) {
2133         PyErr_SetString(PyExc_ValueError,
2134           "translation table must be 256 characters long");
2135         PyBuffer_Release(&table_view);
2136         return NULL;
2137     }
2138 
2139     if (deletechars != NULL) {
2140         if (PyBytes_Check(deletechars)) {
2141             del_table_chars = PyBytes_AS_STRING(deletechars);
2142             dellen = PyBytes_GET_SIZE(deletechars);
2143         }
2144         else {
2145             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2146                 PyBuffer_Release(&table_view);
2147                 return NULL;
2148             }
2149             del_table_chars = del_table_view.buf;
2150             dellen = del_table_view.len;
2151         }
2152     }
2153     else {
2154         del_table_chars = NULL;
2155         dellen = 0;
2156     }
2157 
2158     inlen = PyBytes_GET_SIZE(input_obj);
2159     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2160     if (result == NULL) {
2161         PyBuffer_Release(&del_table_view);
2162         PyBuffer_Release(&table_view);
2163         return NULL;
2164     }
2165     output_start = output = PyBytes_AS_STRING(result);
2166     input = PyBytes_AS_STRING(input_obj);
2167 
2168     if (dellen == 0 && table_chars != NULL) {
2169         /* If no deletions are required, use faster code */
2170         for (i = inlen; --i >= 0; ) {
2171             c = Py_CHARMASK(*input++);
2172             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2173                 changed = 1;
2174         }
2175         if (!changed && PyBytes_CheckExact(input_obj)) {
2176             Py_INCREF(input_obj);
2177             Py_DECREF(result);
2178             result = input_obj;
2179         }
2180         PyBuffer_Release(&del_table_view);
2181         PyBuffer_Release(&table_view);
2182         return result;
2183     }
2184 
2185     if (table_chars == NULL) {
2186         for (i = 0; i < 256; i++)
2187             trans_table[i] = Py_CHARMASK(i);
2188     } else {
2189         for (i = 0; i < 256; i++)
2190             trans_table[i] = Py_CHARMASK(table_chars[i]);
2191     }
2192     PyBuffer_Release(&table_view);
2193 
2194     for (i = 0; i < dellen; i++)
2195         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2196     PyBuffer_Release(&del_table_view);
2197 
2198     for (i = inlen; --i >= 0; ) {
2199         c = Py_CHARMASK(*input++);
2200         if (trans_table[c] != -1)
2201             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2202                 continue;
2203         changed = 1;
2204     }
2205     if (!changed && PyBytes_CheckExact(input_obj)) {
2206         Py_DECREF(result);
2207         Py_INCREF(input_obj);
2208         return input_obj;
2209     }
2210     /* Fix the size of the resulting string */
2211     if (inlen > 0)
2212         _PyBytes_Resize(&result, output - output_start);
2213     return result;
2214 }
2215 
2216 
2217 /*[clinic input]
2218 
2219 @staticmethod
2220 bytes.maketrans
2221 
2222     frm: Py_buffer
2223     to: Py_buffer
2224     /
2225 
2226 Return a translation table useable for the bytes or bytearray translate method.
2227 
2228 The returned table will be one where each byte in frm is mapped to the byte at
2229 the same position in to.
2230 
2231 The bytes objects frm and to must be of the same length.
2232 [clinic start generated code]*/
2233 
2234 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2235 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2236 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2237 {
2238     return _Py_bytes_maketrans(frm, to);
2239 }
2240 
2241 
2242 /*[clinic input]
2243 bytes.replace
2244 
2245     old: Py_buffer
2246     new: Py_buffer
2247     count: Py_ssize_t = -1
2248         Maximum number of occurrences to replace.
2249         -1 (the default value) means replace all occurrences.
2250     /
2251 
2252 Return a copy with all occurrences of substring old replaced by new.
2253 
2254 If the optional argument count is given, only the first count occurrences are
2255 replaced.
2256 [clinic start generated code]*/
2257 
2258 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2259 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2260                    Py_ssize_t count)
2261 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2262 {
2263     return stringlib_replace((PyObject *)self,
2264                              (const char *)old->buf, old->len,
2265                              (const char *)new->buf, new->len, count);
2266 }
2267 
2268 /** End DALKE **/
2269 
2270 
2271 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2272 bytes_startswith(PyBytesObject *self, PyObject *args)
2273 {
2274     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2275 }
2276 
2277 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2278 bytes_endswith(PyBytesObject *self, PyObject *args)
2279 {
2280     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2281 }
2282 
2283 
2284 /*[clinic input]
2285 bytes.decode
2286 
2287     encoding: str(c_default="NULL") = 'utf-8'
2288         The encoding with which to decode the bytes.
2289     errors: str(c_default="NULL") = 'strict'
2290         The error handling scheme to use for the handling of decoding errors.
2291         The default is 'strict' meaning that decoding errors raise a
2292         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2293         as well as any other name registered with codecs.register_error that
2294         can handle UnicodeDecodeErrors.
2295 
2296 Decode the bytes using the codec registered for encoding.
2297 [clinic start generated code]*/
2298 
2299 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2300 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2301                   const char *errors)
2302 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2303 {
2304     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2305 }
2306 
2307 
2308 /*[clinic input]
2309 bytes.splitlines
2310 
2311     keepends: bool(accept={int}) = False
2312 
2313 Return a list of the lines in the bytes, breaking at line boundaries.
2314 
2315 Line breaks are not included in the resulting list unless keepends is given and
2316 true.
2317 [clinic start generated code]*/
2318 
2319 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2320 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2321 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2322 {
2323     return stringlib_splitlines(
2324         (PyObject*) self, PyBytes_AS_STRING(self),
2325         PyBytes_GET_SIZE(self), keepends
2326         );
2327 }
2328 
2329 /*[clinic input]
2330 @classmethod
2331 bytes.fromhex
2332 
2333     string: unicode
2334     /
2335 
2336 Create a bytes object from a string of hexadecimal numbers.
2337 
2338 Spaces between two numbers are accepted.
2339 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2340 [clinic start generated code]*/
2341 
2342 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2343 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2344 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2345 {
2346     PyObject *result = _PyBytes_FromHex(string, 0);
2347     if (type != &PyBytes_Type && result != NULL) {
2348         Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2349                                                        result, NULL));
2350     }
2351     return result;
2352 }
2353 
2354 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2355 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2356 {
2357     char *buf;
2358     Py_ssize_t hexlen, invalid_char;
2359     unsigned int top, bot;
2360     Py_UCS1 *str, *end;
2361     _PyBytesWriter writer;
2362 
2363     _PyBytesWriter_Init(&writer);
2364     writer.use_bytearray = use_bytearray;
2365 
2366     assert(PyUnicode_Check(string));
2367     if (PyUnicode_READY(string))
2368         return NULL;
2369     hexlen = PyUnicode_GET_LENGTH(string);
2370 
2371     if (!PyUnicode_IS_ASCII(string)) {
2372         void *data = PyUnicode_DATA(string);
2373         unsigned int kind = PyUnicode_KIND(string);
2374         Py_ssize_t i;
2375 
2376         /* search for the first non-ASCII character */
2377         for (i = 0; i < hexlen; i++) {
2378             if (PyUnicode_READ(kind, data, i) >= 128)
2379                 break;
2380         }
2381         invalid_char = i;
2382         goto error;
2383     }
2384 
2385     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2386     str = PyUnicode_1BYTE_DATA(string);
2387 
2388     /* This overestimates if there are spaces */
2389     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2390     if (buf == NULL)
2391         return NULL;
2392 
2393     end = str + hexlen;
2394     while (str < end) {
2395         /* skip over spaces in the input */
2396         if (Py_ISSPACE(*str)) {
2397             do {
2398                 str++;
2399             } while (Py_ISSPACE(*str));
2400             if (str >= end)
2401                 break;
2402         }
2403 
2404         top = _PyLong_DigitValue[*str];
2405         if (top >= 16) {
2406             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2407             goto error;
2408         }
2409         str++;
2410 
2411         bot = _PyLong_DigitValue[*str];
2412         if (bot >= 16) {
2413             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2414             goto error;
2415         }
2416         str++;
2417 
2418         *buf++ = (unsigned char)((top << 4) + bot);
2419     }
2420 
2421     return _PyBytesWriter_Finish(&writer, buf);
2422 
2423   error:
2424     PyErr_Format(PyExc_ValueError,
2425                  "non-hexadecimal number found in "
2426                  "fromhex() arg at position %zd", invalid_char);
2427     _PyBytesWriter_Dealloc(&writer);
2428     return NULL;
2429 }
2430 
2431 PyDoc_STRVAR(hex__doc__,
2432 "B.hex() -> string\n\
2433 \n\
2434 Create a string of hexadecimal numbers from a bytes object.\n\
2435 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2436 
2437 static PyObject *
bytes_hex(PyBytesObject * self)2438 bytes_hex(PyBytesObject *self)
2439 {
2440     char* argbuf = PyBytes_AS_STRING(self);
2441     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2442     return _Py_strhex(argbuf, arglen);
2443 }
2444 
2445 static PyObject *
bytes_getnewargs(PyBytesObject * v)2446 bytes_getnewargs(PyBytesObject *v)
2447 {
2448     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2449 }
2450 
2451 
2452 static PyMethodDef
2453 bytes_methods[] = {
2454     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2455     {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2456      _Py_capitalize__doc__},
2457     {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2458      _Py_center__doc__},
2459     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2460      _Py_count__doc__},
2461     BYTES_DECODE_METHODDEF
2462     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2463      _Py_endswith__doc__},
2464     {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2465      _Py_expandtabs__doc__},
2466     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2467      _Py_find__doc__},
2468     BYTES_FROMHEX_METHODDEF
2469     {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2470     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2471     {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2472      _Py_isalnum__doc__},
2473     {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2474      _Py_isalpha__doc__},
2475     {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2476      _Py_isdigit__doc__},
2477     {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2478      _Py_islower__doc__},
2479     {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2480      _Py_isspace__doc__},
2481     {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2482      _Py_istitle__doc__},
2483     {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2484      _Py_isupper__doc__},
2485     BYTES_JOIN_METHODDEF
2486     {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2487     {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2488     BYTES_LSTRIP_METHODDEF
2489     BYTES_MAKETRANS_METHODDEF
2490     BYTES_PARTITION_METHODDEF
2491     BYTES_REPLACE_METHODDEF
2492     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2493     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2494     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2495     BYTES_RPARTITION_METHODDEF
2496     BYTES_RSPLIT_METHODDEF
2497     BYTES_RSTRIP_METHODDEF
2498     BYTES_SPLIT_METHODDEF
2499     BYTES_SPLITLINES_METHODDEF
2500     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2501      _Py_startswith__doc__},
2502     BYTES_STRIP_METHODDEF
2503     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2504      _Py_swapcase__doc__},
2505     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2506     BYTES_TRANSLATE_METHODDEF
2507     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2508     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2509     {NULL,     NULL}                         /* sentinel */
2510 };
2511 
2512 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2513 bytes_mod(PyObject *self, PyObject *arg)
2514 {
2515     if (!PyBytes_Check(self)) {
2516         Py_RETURN_NOTIMPLEMENTED;
2517     }
2518     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2519                              arg, 0);
2520 }
2521 
2522 static PyNumberMethods bytes_as_number = {
2523     0,              /*nb_add*/
2524     0,              /*nb_subtract*/
2525     0,              /*nb_multiply*/
2526     bytes_mod,      /*nb_remainder*/
2527 };
2528 
2529 static PyObject *
2530 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2531 
2532 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2533 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2534 {
2535     PyObject *x = NULL;
2536     const char *encoding = NULL;
2537     const char *errors = NULL;
2538     PyObject *new = NULL;
2539     PyObject *func;
2540     Py_ssize_t size;
2541     static char *kwlist[] = {"source", "encoding", "errors", 0};
2542     _Py_IDENTIFIER(__bytes__);
2543 
2544     if (type != &PyBytes_Type)
2545         return bytes_subtype_new(type, args, kwds);
2546     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2547                                      &encoding, &errors))
2548         return NULL;
2549     if (x == NULL) {
2550         if (encoding != NULL || errors != NULL) {
2551             PyErr_SetString(PyExc_TypeError,
2552                             "encoding or errors without sequence "
2553                             "argument");
2554             return NULL;
2555         }
2556         return PyBytes_FromStringAndSize(NULL, 0);
2557     }
2558 
2559     if (encoding != NULL) {
2560         /* Encode via the codec registry */
2561         if (!PyUnicode_Check(x)) {
2562             PyErr_SetString(PyExc_TypeError,
2563                             "encoding without a string argument");
2564             return NULL;
2565         }
2566         new = PyUnicode_AsEncodedString(x, encoding, errors);
2567         if (new == NULL)
2568             return NULL;
2569         assert(PyBytes_Check(new));
2570         return new;
2571     }
2572 
2573     if (errors != NULL) {
2574         PyErr_SetString(PyExc_TypeError,
2575                         PyUnicode_Check(x) ?
2576                         "string argument without an encoding" :
2577                         "errors without a string argument");
2578         return NULL;
2579     }
2580 
2581     /* We'd like to call PyObject_Bytes here, but we need to check for an
2582        integer argument before deferring to PyBytes_FromObject, something
2583        PyObject_Bytes doesn't do. */
2584     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2585     if (func != NULL) {
2586         new = _PyObject_CallNoArg(func);
2587         Py_DECREF(func);
2588         if (new == NULL)
2589             return NULL;
2590         if (!PyBytes_Check(new)) {
2591             PyErr_Format(PyExc_TypeError,
2592                          "__bytes__ returned non-bytes (type %.200s)",
2593                          Py_TYPE(new)->tp_name);
2594             Py_DECREF(new);
2595             return NULL;
2596         }
2597         return new;
2598     }
2599     else if (PyErr_Occurred())
2600         return NULL;
2601 
2602     if (PyUnicode_Check(x)) {
2603         PyErr_SetString(PyExc_TypeError,
2604                         "string argument without an encoding");
2605         return NULL;
2606     }
2607     /* Is it an integer? */
2608     if (PyIndex_Check(x)) {
2609         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2610         if (size == -1 && PyErr_Occurred()) {
2611             if (PyErr_ExceptionMatches(PyExc_OverflowError))
2612                 return NULL;
2613             PyErr_Clear();  /* fall through */
2614         }
2615         else {
2616             if (size < 0) {
2617                 PyErr_SetString(PyExc_ValueError, "negative count");
2618                 return NULL;
2619             }
2620             new = _PyBytes_FromSize(size, 1);
2621             if (new == NULL)
2622                 return NULL;
2623             return new;
2624         }
2625     }
2626 
2627     return PyBytes_FromObject(x);
2628 }
2629 
2630 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2631 _PyBytes_FromBuffer(PyObject *x)
2632 {
2633     PyObject *new;
2634     Py_buffer view;
2635 
2636     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2637         return NULL;
2638 
2639     new = PyBytes_FromStringAndSize(NULL, view.len);
2640     if (!new)
2641         goto fail;
2642     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2643                 &view, view.len, 'C') < 0)
2644         goto fail;
2645     PyBuffer_Release(&view);
2646     return new;
2647 
2648 fail:
2649     Py_XDECREF(new);
2650     PyBuffer_Release(&view);
2651     return NULL;
2652 }
2653 
2654 #define _PyBytes_FROM_LIST_BODY(x, GET_ITEM)                                \
2655     do {                                                                    \
2656         PyObject *bytes;                                                    \
2657         Py_ssize_t i;                                                       \
2658         Py_ssize_t value;                                                   \
2659         char *str;                                                          \
2660         PyObject *item;                                                     \
2661                                                                             \
2662         bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));                \
2663         if (bytes == NULL)                                                  \
2664             return NULL;                                                    \
2665         str = ((PyBytesObject *)bytes)->ob_sval;                            \
2666                                                                             \
2667         for (i = 0; i < Py_SIZE(x); i++) {                                  \
2668             item = GET_ITEM((x), i);                                        \
2669             value = PyNumber_AsSsize_t(item, NULL);                         \
2670             if (value == -1 && PyErr_Occurred())                            \
2671                 goto error;                                                 \
2672                                                                             \
2673             if (value < 0 || value >= 256) {                                \
2674                 PyErr_SetString(PyExc_ValueError,                           \
2675                                 "bytes must be in range(0, 256)");          \
2676                 goto error;                                                 \
2677             }                                                               \
2678             *str++ = (char) value;                                          \
2679         }                                                                   \
2680         return bytes;                                                       \
2681                                                                             \
2682     error:                                                                  \
2683         Py_DECREF(bytes);                                                   \
2684         return NULL;                                                        \
2685     } while (0)
2686 
2687 static PyObject*
_PyBytes_FromList(PyObject * x)2688 _PyBytes_FromList(PyObject *x)
2689 {
2690     _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
2691 }
2692 
2693 static PyObject*
_PyBytes_FromTuple(PyObject * x)2694 _PyBytes_FromTuple(PyObject *x)
2695 {
2696     _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
2697 }
2698 
2699 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2700 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2701 {
2702     char *str;
2703     Py_ssize_t i, size;
2704     _PyBytesWriter writer;
2705 
2706     /* For iterator version, create a string object and resize as needed */
2707     size = PyObject_LengthHint(x, 64);
2708     if (size == -1 && PyErr_Occurred())
2709         return NULL;
2710 
2711     _PyBytesWriter_Init(&writer);
2712     str = _PyBytesWriter_Alloc(&writer, size);
2713     if (str == NULL)
2714         return NULL;
2715     writer.overallocate = 1;
2716     size = writer.allocated;
2717 
2718     /* Run the iterator to exhaustion */
2719     for (i = 0; ; i++) {
2720         PyObject *item;
2721         Py_ssize_t value;
2722 
2723         /* Get the next item */
2724         item = PyIter_Next(it);
2725         if (item == NULL) {
2726             if (PyErr_Occurred())
2727                 goto error;
2728             break;
2729         }
2730 
2731         /* Interpret it as an int (__index__) */
2732         value = PyNumber_AsSsize_t(item, NULL);
2733         Py_DECREF(item);
2734         if (value == -1 && PyErr_Occurred())
2735             goto error;
2736 
2737         /* Range check */
2738         if (value < 0 || value >= 256) {
2739             PyErr_SetString(PyExc_ValueError,
2740                             "bytes must be in range(0, 256)");
2741             goto error;
2742         }
2743 
2744         /* Append the byte */
2745         if (i >= size) {
2746             str = _PyBytesWriter_Resize(&writer, str, size+1);
2747             if (str == NULL)
2748                 return NULL;
2749             size = writer.allocated;
2750         }
2751         *str++ = (char) value;
2752     }
2753 
2754     return _PyBytesWriter_Finish(&writer, str);
2755 
2756   error:
2757     _PyBytesWriter_Dealloc(&writer);
2758     return NULL;
2759 }
2760 
2761 PyObject *
PyBytes_FromObject(PyObject * x)2762 PyBytes_FromObject(PyObject *x)
2763 {
2764     PyObject *it, *result;
2765 
2766     if (x == NULL) {
2767         PyErr_BadInternalCall();
2768         return NULL;
2769     }
2770 
2771     if (PyBytes_CheckExact(x)) {
2772         Py_INCREF(x);
2773         return x;
2774     }
2775 
2776     /* Use the modern buffer interface */
2777     if (PyObject_CheckBuffer(x))
2778         return _PyBytes_FromBuffer(x);
2779 
2780     if (PyList_CheckExact(x))
2781         return _PyBytes_FromList(x);
2782 
2783     if (PyTuple_CheckExact(x))
2784         return _PyBytes_FromTuple(x);
2785 
2786     if (!PyUnicode_Check(x)) {
2787         it = PyObject_GetIter(x);
2788         if (it != NULL) {
2789             result = _PyBytes_FromIterator(it, x);
2790             Py_DECREF(it);
2791             return result;
2792         }
2793     }
2794 
2795     PyErr_Format(PyExc_TypeError,
2796                  "cannot convert '%.200s' object to bytes",
2797                  x->ob_type->tp_name);
2798     return NULL;
2799 }
2800 
2801 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2802 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2803 {
2804     PyObject *tmp, *pnew;
2805     Py_ssize_t n;
2806 
2807     assert(PyType_IsSubtype(type, &PyBytes_Type));
2808     tmp = bytes_new(&PyBytes_Type, args, kwds);
2809     if (tmp == NULL)
2810         return NULL;
2811     assert(PyBytes_Check(tmp));
2812     n = PyBytes_GET_SIZE(tmp);
2813     pnew = type->tp_alloc(type, n);
2814     if (pnew != NULL) {
2815         memcpy(PyBytes_AS_STRING(pnew),
2816                   PyBytes_AS_STRING(tmp), n+1);
2817         ((PyBytesObject *)pnew)->ob_shash =
2818             ((PyBytesObject *)tmp)->ob_shash;
2819     }
2820     Py_DECREF(tmp);
2821     return pnew;
2822 }
2823 
2824 PyDoc_STRVAR(bytes_doc,
2825 "bytes(iterable_of_ints) -> bytes\n\
2826 bytes(string, encoding[, errors]) -> bytes\n\
2827 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2828 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2829 bytes() -> empty bytes object\n\
2830 \n\
2831 Construct an immutable array of bytes from:\n\
2832   - an iterable yielding integers in range(256)\n\
2833   - a text string encoded using the specified encoding\n\
2834   - any object implementing the buffer API.\n\
2835   - an integer");
2836 
2837 static PyObject *bytes_iter(PyObject *seq);
2838 
2839 PyTypeObject PyBytes_Type = {
2840     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2841     "bytes",
2842     PyBytesObject_SIZE,
2843     sizeof(char),
2844     bytes_dealloc,                      /* tp_dealloc */
2845     0,                                          /* tp_print */
2846     0,                                          /* tp_getattr */
2847     0,                                          /* tp_setattr */
2848     0,                                          /* tp_reserved */
2849     (reprfunc)bytes_repr,                       /* tp_repr */
2850     &bytes_as_number,                           /* tp_as_number */
2851     &bytes_as_sequence,                         /* tp_as_sequence */
2852     &bytes_as_mapping,                          /* tp_as_mapping */
2853     (hashfunc)bytes_hash,                       /* tp_hash */
2854     0,                                          /* tp_call */
2855     bytes_str,                                  /* tp_str */
2856     PyObject_GenericGetAttr,                    /* tp_getattro */
2857     0,                                          /* tp_setattro */
2858     &bytes_as_buffer,                           /* tp_as_buffer */
2859     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2860         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2861     bytes_doc,                                  /* tp_doc */
2862     0,                                          /* tp_traverse */
2863     0,                                          /* tp_clear */
2864     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2865     0,                                          /* tp_weaklistoffset */
2866     bytes_iter,                                 /* tp_iter */
2867     0,                                          /* tp_iternext */
2868     bytes_methods,                              /* tp_methods */
2869     0,                                          /* tp_members */
2870     0,                                          /* tp_getset */
2871     &PyBaseObject_Type,                         /* tp_base */
2872     0,                                          /* tp_dict */
2873     0,                                          /* tp_descr_get */
2874     0,                                          /* tp_descr_set */
2875     0,                                          /* tp_dictoffset */
2876     0,                                          /* tp_init */
2877     0,                                          /* tp_alloc */
2878     bytes_new,                                  /* tp_new */
2879     PyObject_Del,                               /* tp_free */
2880 };
2881 
2882 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2883 PyBytes_Concat(PyObject **pv, PyObject *w)
2884 {
2885     assert(pv != NULL);
2886     if (*pv == NULL)
2887         return;
2888     if (w == NULL) {
2889         Py_CLEAR(*pv);
2890         return;
2891     }
2892 
2893     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2894         /* Only one reference, so we can resize in place */
2895         Py_ssize_t oldsize;
2896         Py_buffer wb;
2897 
2898         wb.len = -1;
2899         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2900             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2901                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2902             Py_CLEAR(*pv);
2903             return;
2904         }
2905 
2906         oldsize = PyBytes_GET_SIZE(*pv);
2907         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2908             PyErr_NoMemory();
2909             goto error;
2910         }
2911         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2912             goto error;
2913 
2914         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2915         PyBuffer_Release(&wb);
2916         return;
2917 
2918       error:
2919         PyBuffer_Release(&wb);
2920         Py_CLEAR(*pv);
2921         return;
2922     }
2923 
2924     else {
2925         /* Multiple references, need to create new object */
2926         PyObject *v;
2927         v = bytes_concat(*pv, w);
2928         Py_SETREF(*pv, v);
2929     }
2930 }
2931 
2932 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2933 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2934 {
2935     PyBytes_Concat(pv, w);
2936     Py_XDECREF(w);
2937 }
2938 
2939 
2940 /* The following function breaks the notion that bytes are immutable:
2941    it changes the size of a bytes object.  We get away with this only if there
2942    is only one module referencing the object.  You can also think of it
2943    as creating a new bytes object and destroying the old one, only
2944    more efficiently.  In any case, don't use this if the bytes object may
2945    already be known to some other part of the code...
2946    Note that if there's not enough memory to resize the bytes object, the
2947    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2948    memory" exception is set, and -1 is returned.  Else (on success) 0 is
2949    returned, and the value in *pv may or may not be the same as on input.
2950    As always, an extra byte is allocated for a trailing \0 byte (newsize
2951    does *not* include that), and a trailing \0 byte is stored.
2952 */
2953 
2954 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)2955 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2956 {
2957     PyObject *v;
2958     PyBytesObject *sv;
2959     v = *pv;
2960     if (!PyBytes_Check(v) || newsize < 0) {
2961         goto error;
2962     }
2963     if (Py_SIZE(v) == newsize) {
2964         /* return early if newsize equals to v->ob_size */
2965         return 0;
2966     }
2967     if (Py_REFCNT(v) != 1) {
2968         goto error;
2969     }
2970     /* XXX UNREF/NEWREF interface should be more symmetrical */
2971     _Py_DEC_REFTOTAL;
2972     _Py_ForgetReference(v);
2973     *pv = (PyObject *)
2974         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
2975     if (*pv == NULL) {
2976         PyObject_Del(v);
2977         PyErr_NoMemory();
2978         return -1;
2979     }
2980     _Py_NewReference(*pv);
2981     sv = (PyBytesObject *) *pv;
2982     Py_SIZE(sv) = newsize;
2983     sv->ob_sval[newsize] = '\0';
2984     sv->ob_shash = -1;          /* invalidate cached hash value */
2985     return 0;
2986 error:
2987     *pv = 0;
2988     Py_DECREF(v);
2989     PyErr_BadInternalCall();
2990     return -1;
2991 }
2992 
2993 void
PyBytes_Fini(void)2994 PyBytes_Fini(void)
2995 {
2996     int i;
2997     for (i = 0; i < UCHAR_MAX + 1; i++)
2998         Py_CLEAR(characters[i]);
2999     Py_CLEAR(nullstring);
3000 }
3001 
3002 /*********************** Bytes Iterator ****************************/
3003 
3004 typedef struct {
3005     PyObject_HEAD
3006     Py_ssize_t it_index;
3007     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3008 } striterobject;
3009 
3010 static void
striter_dealloc(striterobject * it)3011 striter_dealloc(striterobject *it)
3012 {
3013     _PyObject_GC_UNTRACK(it);
3014     Py_XDECREF(it->it_seq);
3015     PyObject_GC_Del(it);
3016 }
3017 
3018 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3019 striter_traverse(striterobject *it, visitproc visit, void *arg)
3020 {
3021     Py_VISIT(it->it_seq);
3022     return 0;
3023 }
3024 
3025 static PyObject *
striter_next(striterobject * it)3026 striter_next(striterobject *it)
3027 {
3028     PyBytesObject *seq;
3029     PyObject *item;
3030 
3031     assert(it != NULL);
3032     seq = it->it_seq;
3033     if (seq == NULL)
3034         return NULL;
3035     assert(PyBytes_Check(seq));
3036 
3037     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3038         item = PyLong_FromLong(
3039             (unsigned char)seq->ob_sval[it->it_index]);
3040         if (item != NULL)
3041             ++it->it_index;
3042         return item;
3043     }
3044 
3045     it->it_seq = NULL;
3046     Py_DECREF(seq);
3047     return NULL;
3048 }
3049 
3050 static PyObject *
striter_len(striterobject * it)3051 striter_len(striterobject *it)
3052 {
3053     Py_ssize_t len = 0;
3054     if (it->it_seq)
3055         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3056     return PyLong_FromSsize_t(len);
3057 }
3058 
3059 PyDoc_STRVAR(length_hint_doc,
3060              "Private method returning an estimate of len(list(it)).");
3061 
3062 static PyObject *
striter_reduce(striterobject * it)3063 striter_reduce(striterobject *it)
3064 {
3065     if (it->it_seq != NULL) {
3066         return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3067                              it->it_seq, it->it_index);
3068     } else {
3069         return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
3070     }
3071 }
3072 
3073 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3074 
3075 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3076 striter_setstate(striterobject *it, PyObject *state)
3077 {
3078     Py_ssize_t index = PyLong_AsSsize_t(state);
3079     if (index == -1 && PyErr_Occurred())
3080         return NULL;
3081     if (it->it_seq != NULL) {
3082         if (index < 0)
3083             index = 0;
3084         else if (index > PyBytes_GET_SIZE(it->it_seq))
3085             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3086         it->it_index = index;
3087     }
3088     Py_RETURN_NONE;
3089 }
3090 
3091 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3092 
3093 static PyMethodDef striter_methods[] = {
3094     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3095      length_hint_doc},
3096     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3097      reduce_doc},
3098     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3099      setstate_doc},
3100     {NULL,              NULL}           /* sentinel */
3101 };
3102 
3103 PyTypeObject PyBytesIter_Type = {
3104     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3105     "bytes_iterator",                           /* tp_name */
3106     sizeof(striterobject),                      /* tp_basicsize */
3107     0,                                          /* tp_itemsize */
3108     /* methods */
3109     (destructor)striter_dealloc,                /* tp_dealloc */
3110     0,                                          /* tp_print */
3111     0,                                          /* tp_getattr */
3112     0,                                          /* tp_setattr */
3113     0,                                          /* tp_reserved */
3114     0,                                          /* tp_repr */
3115     0,                                          /* tp_as_number */
3116     0,                                          /* tp_as_sequence */
3117     0,                                          /* tp_as_mapping */
3118     0,                                          /* tp_hash */
3119     0,                                          /* tp_call */
3120     0,                                          /* tp_str */
3121     PyObject_GenericGetAttr,                    /* tp_getattro */
3122     0,                                          /* tp_setattro */
3123     0,                                          /* tp_as_buffer */
3124     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3125     0,                                          /* tp_doc */
3126     (traverseproc)striter_traverse,     /* tp_traverse */
3127     0,                                          /* tp_clear */
3128     0,                                          /* tp_richcompare */
3129     0,                                          /* tp_weaklistoffset */
3130     PyObject_SelfIter,                          /* tp_iter */
3131     (iternextfunc)striter_next,                 /* tp_iternext */
3132     striter_methods,                            /* tp_methods */
3133     0,
3134 };
3135 
3136 static PyObject *
bytes_iter(PyObject * seq)3137 bytes_iter(PyObject *seq)
3138 {
3139     striterobject *it;
3140 
3141     if (!PyBytes_Check(seq)) {
3142         PyErr_BadInternalCall();
3143         return NULL;
3144     }
3145     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3146     if (it == NULL)
3147         return NULL;
3148     it->it_index = 0;
3149     Py_INCREF(seq);
3150     it->it_seq = (PyBytesObject *)seq;
3151     _PyObject_GC_TRACK(it);
3152     return (PyObject *)it;
3153 }
3154 
3155 
3156 /* _PyBytesWriter API */
3157 
3158 #ifdef MS_WINDOWS
3159    /* On Windows, overallocate by 50% is the best factor */
3160 #  define OVERALLOCATE_FACTOR 2
3161 #else
3162    /* On Linux, overallocate by 25% is the best factor */
3163 #  define OVERALLOCATE_FACTOR 4
3164 #endif
3165 
3166 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3167 _PyBytesWriter_Init(_PyBytesWriter *writer)
3168 {
3169     /* Set all attributes before small_buffer to 0 */
3170     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3171 #ifdef Py_DEBUG
3172     memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3173 #endif
3174 }
3175 
3176 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3177 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3178 {
3179     Py_CLEAR(writer->buffer);
3180 }
3181 
3182 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3183 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3184 {
3185     if (writer->use_small_buffer) {
3186         assert(writer->buffer == NULL);
3187         return writer->small_buffer;
3188     }
3189     else if (writer->use_bytearray) {
3190         assert(writer->buffer != NULL);
3191         return PyByteArray_AS_STRING(writer->buffer);
3192     }
3193     else {
3194         assert(writer->buffer != NULL);
3195         return PyBytes_AS_STRING(writer->buffer);
3196     }
3197 }
3198 
3199 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3200 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3201 {
3202     char *start = _PyBytesWriter_AsString(writer);
3203     assert(str != NULL);
3204     assert(str >= start);
3205     assert(str - start <= writer->allocated);
3206     return str - start;
3207 }
3208 
3209 Py_LOCAL_INLINE(void)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3210 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3211 {
3212 #ifdef Py_DEBUG
3213     char *start, *end;
3214 
3215     if (writer->use_small_buffer) {
3216         assert(writer->buffer == NULL);
3217     }
3218     else {
3219         assert(writer->buffer != NULL);
3220         if (writer->use_bytearray)
3221             assert(PyByteArray_CheckExact(writer->buffer));
3222         else
3223             assert(PyBytes_CheckExact(writer->buffer));
3224         assert(Py_REFCNT(writer->buffer) == 1);
3225     }
3226 
3227     if (writer->use_bytearray) {
3228         /* bytearray has its own overallocation algorithm,
3229            writer overallocation must be disabled */
3230         assert(!writer->overallocate);
3231     }
3232 
3233     assert(0 <= writer->allocated);
3234     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3235     /* the last byte must always be null */
3236     start = _PyBytesWriter_AsString(writer);
3237     assert(start[writer->allocated] == 0);
3238 
3239     end = start + writer->allocated;
3240     assert(str != NULL);
3241     assert(start <= str && str <= end);
3242 #endif
3243 }
3244 
3245 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3246 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3247 {
3248     Py_ssize_t allocated, pos;
3249 
3250     _PyBytesWriter_CheckConsistency(writer, str);
3251     assert(writer->allocated < size);
3252 
3253     allocated = size;
3254     if (writer->overallocate
3255         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3256         /* overallocate to limit the number of realloc() */
3257         allocated += allocated / OVERALLOCATE_FACTOR;
3258     }
3259 
3260     pos = _PyBytesWriter_GetSize(writer, str);
3261     if (!writer->use_small_buffer) {
3262         if (writer->use_bytearray) {
3263             if (PyByteArray_Resize(writer->buffer, allocated))
3264                 goto error;
3265             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3266                but we cannot use ob_alloc because bytes may need to be moved
3267                to use the whole buffer. bytearray uses an internal optimization
3268                to avoid moving or copying bytes when bytes are removed at the
3269                beginning (ex: del bytearray[:1]). */
3270         }
3271         else {
3272             if (_PyBytes_Resize(&writer->buffer, allocated))
3273                 goto error;
3274         }
3275     }
3276     else {
3277         /* convert from stack buffer to bytes object buffer */
3278         assert(writer->buffer == NULL);
3279 
3280         if (writer->use_bytearray)
3281             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3282         else
3283             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3284         if (writer->buffer == NULL)
3285             goto error;
3286 
3287         if (pos != 0) {
3288             char *dest;
3289             if (writer->use_bytearray)
3290                 dest = PyByteArray_AS_STRING(writer->buffer);
3291             else
3292                 dest = PyBytes_AS_STRING(writer->buffer);
3293             memcpy(dest,
3294                       writer->small_buffer,
3295                       pos);
3296         }
3297 
3298         writer->use_small_buffer = 0;
3299 #ifdef Py_DEBUG
3300         memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3301 #endif
3302     }
3303     writer->allocated = allocated;
3304 
3305     str = _PyBytesWriter_AsString(writer) + pos;
3306     _PyBytesWriter_CheckConsistency(writer, str);
3307     return str;
3308 
3309 error:
3310     _PyBytesWriter_Dealloc(writer);
3311     return NULL;
3312 }
3313 
3314 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3315 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3316 {
3317     Py_ssize_t new_min_size;
3318 
3319     _PyBytesWriter_CheckConsistency(writer, str);
3320     assert(size >= 0);
3321 
3322     if (size == 0) {
3323         /* nothing to do */
3324         return str;
3325     }
3326 
3327     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3328         PyErr_NoMemory();
3329         _PyBytesWriter_Dealloc(writer);
3330         return NULL;
3331     }
3332     new_min_size = writer->min_size + size;
3333 
3334     if (new_min_size > writer->allocated)
3335         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3336 
3337     writer->min_size = new_min_size;
3338     return str;
3339 }
3340 
3341 /* Allocate the buffer to write size bytes.
3342    Return the pointer to the beginning of buffer data.
3343    Raise an exception and return NULL on error. */
3344 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3345 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3346 {
3347     /* ensure that _PyBytesWriter_Alloc() is only called once */
3348     assert(writer->min_size == 0 && writer->buffer == NULL);
3349     assert(size >= 0);
3350 
3351     writer->use_small_buffer = 1;
3352 #ifdef Py_DEBUG
3353     writer->allocated = sizeof(writer->small_buffer) - 1;
3354     /* In debug mode, don't use the full small buffer because it is less
3355        efficient than bytes and bytearray objects to detect buffer underflow
3356        and buffer overflow. Use 10 bytes of the small buffer to test also
3357        code using the smaller buffer in debug mode.
3358 
3359        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3360        in debug mode to also be able to detect stack overflow when running
3361        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3362        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3363        stack overflow. */
3364     writer->allocated = Py_MIN(writer->allocated, 10);
3365     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3366        to detect buffer overflow */
3367     writer->small_buffer[writer->allocated] = 0;
3368 #else
3369     writer->allocated = sizeof(writer->small_buffer);
3370 #endif
3371     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3372 }
3373 
3374 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3375 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3376 {
3377     Py_ssize_t size;
3378     PyObject *result;
3379 
3380     _PyBytesWriter_CheckConsistency(writer, str);
3381 
3382     size = _PyBytesWriter_GetSize(writer, str);
3383     if (size == 0 && !writer->use_bytearray) {
3384         Py_CLEAR(writer->buffer);
3385         /* Get the empty byte string singleton */
3386         result = PyBytes_FromStringAndSize(NULL, 0);
3387     }
3388     else if (writer->use_small_buffer) {
3389         if (writer->use_bytearray) {
3390             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3391         }
3392         else {
3393             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3394         }
3395     }
3396     else {
3397         result = writer->buffer;
3398         writer->buffer = NULL;
3399 
3400         if (size != writer->allocated) {
3401             if (writer->use_bytearray) {
3402                 if (PyByteArray_Resize(result, size)) {
3403                     Py_DECREF(result);
3404                     return NULL;
3405                 }
3406             }
3407             else {
3408                 if (_PyBytes_Resize(&result, size)) {
3409                     assert(result == NULL);
3410                     return NULL;
3411                 }
3412             }
3413         }
3414     }
3415     return result;
3416 }
3417 
3418 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3419 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3420                           const void *bytes, Py_ssize_t size)
3421 {
3422     char *str = (char *)ptr;
3423 
3424     str = _PyBytesWriter_Prepare(writer, str, size);
3425     if (str == NULL)
3426         return NULL;
3427 
3428     memcpy(str, bytes, size);
3429     str += size;
3430 
3431     return str;
3432 }
3433