1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "pycore_abstract.h"      // _PyIndex_Check()
7 #include "pycore_bytesobject.h"   // _PyBytes_Find(), _PyBytes_Repeat()
8 #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
9 #include "pycore_call.h"          // _PyObject_CallNoArgs()
10 #include "pycore_format.h"        // F_LJUST
11 #include "pycore_global_objects.h"  // _Py_GET_GLOBAL_OBJECT()
12 #include "pycore_initconfig.h"    // _PyStatus_OK()
13 #include "pycore_long.h"          // _PyLong_DigitValue
14 #include "pycore_object.h"        // _PyObject_GC_TRACK
15 #include "pycore_pymem.h"         // PYMEM_CLEANBYTE
16 #include "pycore_strhex.h"        // _Py_strhex_with_sep()
17 
18 #include <stddef.h>
19 
20 /*[clinic input]
21 class bytes "PyBytesObject *" "&PyBytes_Type"
22 [clinic start generated code]*/
23 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24 
25 #include "clinic/bytesobject.c.h"
26 
27 /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28    for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29 
30    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31    3 or 7 bytes per bytes object allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34 
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37                                                    char *str);
38 
39 
40 #define CHARACTERS _Py_SINGLETON(bytes_characters)
41 #define CHARACTER(ch) \
42      ((PyBytesObject *)&(CHARACTERS[ch]));
43 #define EMPTY (&_Py_SINGLETON(bytes_empty))
44 
45 
46 // Return a borrowed reference to the empty bytes string singleton.
bytes_get_empty(void)47 static inline PyObject* bytes_get_empty(void)
48 {
49     return &EMPTY->ob_base.ob_base;
50 }
51 
52 
53 // Return a strong reference to the empty bytes string singleton.
bytes_new_empty(void)54 static inline PyObject* bytes_new_empty(void)
55 {
56     Py_INCREF(EMPTY);
57     return (PyObject *)EMPTY;
58 }
59 
60 
61 /*
62    For PyBytes_FromString(), the parameter `str' points to a null-terminated
63    string containing exactly `size' bytes.
64 
65    For PyBytes_FromStringAndSize(), the parameter `str' is
66    either NULL or else points to a string containing at least `size' bytes.
67    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
68    not have to be null-terminated.  (Therefore it is safe to construct a
69    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
70    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
71    bytes (setting the last byte to the null terminating character) and you can
72    fill in the data yourself.  If `str' is non-NULL then the resulting
73    PyBytes object must be treated as immutable and you must not fill in nor
74    alter the data yourself, since the strings may be shared.
75 
76    The PyObject member `op->ob_size', which denotes the number of "extra
77    items" in a variable-size object, will contain the number of bytes
78    allocated for string data, not counting the null terminating character.
79    It is therefore equal to the `size' parameter (for
80    PyBytes_FromStringAndSize()) or the length of the string in the `str'
81    parameter (for PyBytes_FromString()).
82 */
83 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)84 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
85 {
86     PyBytesObject *op;
87     assert(size >= 0);
88 
89     if (size == 0) {
90         return bytes_new_empty();
91     }
92 
93     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
94         PyErr_SetString(PyExc_OverflowError,
95                         "byte string is too large");
96         return NULL;
97     }
98 
99     /* Inline PyObject_NewVar */
100     if (use_calloc)
101         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102     else
103         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
104     if (op == NULL) {
105         return PyErr_NoMemory();
106     }
107     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
108 _Py_COMP_DIAG_PUSH
109 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
110     op->ob_shash = -1;
111 _Py_COMP_DIAG_POP
112     if (!use_calloc) {
113         op->ob_sval[size] = '\0';
114     }
115     return (PyObject *) op;
116 }
117 
118 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)119 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120 {
121     PyBytesObject *op;
122     if (size < 0) {
123         PyErr_SetString(PyExc_SystemError,
124             "Negative size passed to PyBytes_FromStringAndSize");
125         return NULL;
126     }
127     if (size == 1 && str != NULL) {
128         op = CHARACTER(*str & 255);
129         Py_INCREF(op);
130         return (PyObject *)op;
131     }
132     if (size == 0) {
133         return bytes_new_empty();
134     }
135 
136     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137     if (op == NULL)
138         return NULL;
139     if (str == NULL)
140         return (PyObject *) op;
141 
142     memcpy(op->ob_sval, str, size);
143     return (PyObject *) op;
144 }
145 
146 PyObject *
PyBytes_FromString(const char * str)147 PyBytes_FromString(const char *str)
148 {
149     size_t size;
150     PyBytesObject *op;
151 
152     assert(str != NULL);
153     size = strlen(str);
154     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
155         PyErr_SetString(PyExc_OverflowError,
156             "byte string is too long");
157         return NULL;
158     }
159 
160     if (size == 0) {
161         return bytes_new_empty();
162     }
163     else if (size == 1) {
164         op = CHARACTER(*str & 255);
165         Py_INCREF(op);
166         return (PyObject *)op;
167     }
168 
169     /* Inline PyObject_NewVar */
170     op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
171     if (op == NULL) {
172         return PyErr_NoMemory();
173     }
174     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
175 _Py_COMP_DIAG_PUSH
176 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
177     op->ob_shash = -1;
178 _Py_COMP_DIAG_POP
179     memcpy(op->ob_sval, str, size+1);
180     return (PyObject *) op;
181 }
182 
183 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)184 PyBytes_FromFormatV(const char *format, va_list vargs)
185 {
186     char *s;
187     const char *f;
188     const char *p;
189     Py_ssize_t prec;
190     int longflag;
191     int size_tflag;
192     /* Longest 64-bit formatted numbers:
193        - "18446744073709551615\0" (21 bytes)
194        - "-9223372036854775808\0" (21 bytes)
195        Decimal takes the most space (it isn't enough for octal.)
196 
197        Longest 64-bit pointer representation:
198        "0xffffffffffffffff\0" (19 bytes). */
199     char buffer[21];
200     _PyBytesWriter writer;
201 
202     _PyBytesWriter_Init(&writer);
203 
204     s = _PyBytesWriter_Alloc(&writer, strlen(format));
205     if (s == NULL)
206         return NULL;
207     writer.overallocate = 1;
208 
209 #define WRITE_BYTES(str) \
210     do { \
211         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
212         if (s == NULL) \
213             goto error; \
214     } while (0)
215 
216     for (f = format; *f; f++) {
217         if (*f != '%') {
218             *s++ = *f;
219             continue;
220         }
221 
222         p = f++;
223 
224         /* ignore the width (ex: 10 in "%10s") */
225         while (Py_ISDIGIT(*f))
226             f++;
227 
228         /* parse the precision (ex: 10 in "%.10s") */
229         prec = 0;
230         if (*f == '.') {
231             f++;
232             for (; Py_ISDIGIT(*f); f++) {
233                 prec = (prec * 10) + (*f - '0');
234             }
235         }
236 
237         while (*f && *f != '%' && !Py_ISALPHA(*f))
238             f++;
239 
240         /* handle the long flag ('l'), but only for %ld and %lu.
241            others can be added when necessary. */
242         longflag = 0;
243         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
244             longflag = 1;
245             ++f;
246         }
247 
248         /* handle the size_t flag ('z'). */
249         size_tflag = 0;
250         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
251             size_tflag = 1;
252             ++f;
253         }
254 
255         /* subtract bytes preallocated for the format string
256            (ex: 2 for "%s") */
257         writer.min_size -= (f - p + 1);
258 
259         switch (*f) {
260         case 'c':
261         {
262             int c = va_arg(vargs, int);
263             if (c < 0 || c > 255) {
264                 PyErr_SetString(PyExc_OverflowError,
265                                 "PyBytes_FromFormatV(): %c format "
266                                 "expects an integer in range [0; 255]");
267                 goto error;
268             }
269             writer.min_size++;
270             *s++ = (unsigned char)c;
271             break;
272         }
273 
274         case 'd':
275             if (longflag) {
276                 sprintf(buffer, "%ld", va_arg(vargs, long));
277             }
278             else if (size_tflag) {
279                 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
280             }
281             else {
282                 sprintf(buffer, "%d", va_arg(vargs, int));
283             }
284             assert(strlen(buffer) < sizeof(buffer));
285             WRITE_BYTES(buffer);
286             break;
287 
288         case 'u':
289             if (longflag) {
290                 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
291             }
292             else if (size_tflag) {
293                 sprintf(buffer, "%zu", va_arg(vargs, size_t));
294             }
295             else {
296                 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
297             }
298             assert(strlen(buffer) < sizeof(buffer));
299             WRITE_BYTES(buffer);
300             break;
301 
302         case 'i':
303             sprintf(buffer, "%i", va_arg(vargs, int));
304             assert(strlen(buffer) < sizeof(buffer));
305             WRITE_BYTES(buffer);
306             break;
307 
308         case 'x':
309             sprintf(buffer, "%x", va_arg(vargs, int));
310             assert(strlen(buffer) < sizeof(buffer));
311             WRITE_BYTES(buffer);
312             break;
313 
314         case 's':
315         {
316             Py_ssize_t i;
317 
318             p = va_arg(vargs, const char*);
319             if (prec <= 0) {
320                 i = strlen(p);
321             }
322             else {
323                 i = 0;
324                 while (i < prec && p[i]) {
325                     i++;
326                 }
327             }
328             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
329             if (s == NULL)
330                 goto error;
331             break;
332         }
333 
334         case 'p':
335             sprintf(buffer, "%p", va_arg(vargs, void*));
336             assert(strlen(buffer) < sizeof(buffer));
337             /* %p is ill-defined:  ensure leading 0x. */
338             if (buffer[1] == 'X')
339                 buffer[1] = 'x';
340             else if (buffer[1] != 'x') {
341                 memmove(buffer+2, buffer, strlen(buffer)+1);
342                 buffer[0] = '0';
343                 buffer[1] = 'x';
344             }
345             WRITE_BYTES(buffer);
346             break;
347 
348         case '%':
349             writer.min_size++;
350             *s++ = '%';
351             break;
352 
353         default:
354             if (*f == 0) {
355                 /* fix min_size if we reached the end of the format string */
356                 writer.min_size++;
357             }
358 
359             /* invalid format string: copy unformatted string and exit */
360             WRITE_BYTES(p);
361             return _PyBytesWriter_Finish(&writer, s);
362         }
363     }
364 
365 #undef WRITE_BYTES
366 
367     return _PyBytesWriter_Finish(&writer, s);
368 
369  error:
370     _PyBytesWriter_Dealloc(&writer);
371     return NULL;
372 }
373 
374 PyObject *
PyBytes_FromFormat(const char * format,...)375 PyBytes_FromFormat(const char *format, ...)
376 {
377     PyObject* ret;
378     va_list vargs;
379 
380 #ifdef HAVE_STDARG_PROTOTYPES
381     va_start(vargs, format);
382 #else
383     va_start(vargs);
384 #endif
385     ret = PyBytes_FromFormatV(format, vargs);
386     va_end(vargs);
387     return ret;
388 }
389 
390 /* Helpers for formatstring */
391 
392 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)393 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
394 {
395     Py_ssize_t argidx = *p_argidx;
396     if (argidx < arglen) {
397         (*p_argidx)++;
398         if (arglen < 0)
399             return args;
400         else
401             return PyTuple_GetItem(args, argidx);
402     }
403     PyErr_SetString(PyExc_TypeError,
404                     "not enough arguments for format string");
405     return NULL;
406 }
407 
408 /* Returns a new reference to a PyBytes object, or NULL on failure. */
409 
410 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)411 formatfloat(PyObject *v, int flags, int prec, int type,
412             PyObject **p_result, _PyBytesWriter *writer, char *str)
413 {
414     char *p;
415     PyObject *result;
416     double x;
417     size_t len;
418     int dtoa_flags = 0;
419 
420     x = PyFloat_AsDouble(v);
421     if (x == -1.0 && PyErr_Occurred()) {
422         PyErr_Format(PyExc_TypeError, "float argument required, "
423                      "not %.200s", Py_TYPE(v)->tp_name);
424         return NULL;
425     }
426 
427     if (prec < 0)
428         prec = 6;
429 
430     if (flags & F_ALT) {
431         dtoa_flags |= Py_DTSF_ALT;
432     }
433     p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
434 
435     if (p == NULL)
436         return NULL;
437 
438     len = strlen(p);
439     if (writer != NULL) {
440         str = _PyBytesWriter_Prepare(writer, str, len);
441         if (str == NULL) {
442             PyMem_Free(p);
443             return NULL;
444         }
445         memcpy(str, p, len);
446         PyMem_Free(p);
447         str += len;
448         return str;
449     }
450 
451     result = PyBytes_FromStringAndSize(p, len);
452     PyMem_Free(p);
453     *p_result = result;
454     return result != NULL ? str : NULL;
455 }
456 
457 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)458 formatlong(PyObject *v, int flags, int prec, int type)
459 {
460     PyObject *result, *iobj;
461     if (type == 'i')
462         type = 'd';
463     if (PyLong_Check(v))
464         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
465     if (PyNumber_Check(v)) {
466         /* make sure number is a type of integer for o, x, and X */
467         if (type == 'o' || type == 'x' || type == 'X')
468             iobj = _PyNumber_Index(v);
469         else
470             iobj = PyNumber_Long(v);
471         if (iobj != NULL) {
472             assert(PyLong_Check(iobj));
473             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474             Py_DECREF(iobj);
475             return result;
476         }
477         if (!PyErr_ExceptionMatches(PyExc_TypeError))
478             return NULL;
479     }
480     PyErr_Format(PyExc_TypeError,
481         "%%%c format: %s is required, not %.200s", type,
482         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483                                                     : "a real number",
484         Py_TYPE(v)->tp_name);
485     return NULL;
486 }
487 
488 static int
byte_converter(PyObject * arg,char * p)489 byte_converter(PyObject *arg, char *p)
490 {
491     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
492         *p = PyBytes_AS_STRING(arg)[0];
493         return 1;
494     }
495     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
496         *p = PyByteArray_AS_STRING(arg)[0];
497         return 1;
498     }
499     else {
500         int overflow;
501         long ival = PyLong_AsLongAndOverflow(arg, &overflow);
502         if (ival == -1 && PyErr_Occurred()) {
503             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
504                 goto onError;
505             }
506             return 0;
507         }
508         if (!(0 <= ival && ival <= 255)) {
509             /* this includes an overflow in converting to C long */
510             PyErr_SetString(PyExc_OverflowError,
511                             "%c arg not in range(256)");
512             return 0;
513         }
514         *p = (char)ival;
515         return 1;
516     }
517   onError:
518     PyErr_SetString(PyExc_TypeError,
519         "%c requires an integer in range(256) or a single byte");
520     return 0;
521 }
522 
523 static PyObject *_PyBytes_FromBuffer(PyObject *x);
524 
525 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)526 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
527 {
528     PyObject *func, *result;
529     /* is it a bytes object? */
530     if (PyBytes_Check(v)) {
531         *pbuf = PyBytes_AS_STRING(v);
532         *plen = PyBytes_GET_SIZE(v);
533         Py_INCREF(v);
534         return v;
535     }
536     if (PyByteArray_Check(v)) {
537         *pbuf = PyByteArray_AS_STRING(v);
538         *plen = PyByteArray_GET_SIZE(v);
539         Py_INCREF(v);
540         return v;
541     }
542     /* does it support __bytes__? */
543     func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
544     if (func != NULL) {
545         result = _PyObject_CallNoArgs(func);
546         Py_DECREF(func);
547         if (result == NULL)
548             return NULL;
549         if (!PyBytes_Check(result)) {
550             PyErr_Format(PyExc_TypeError,
551                          "__bytes__ returned non-bytes (type %.200s)",
552                          Py_TYPE(result)->tp_name);
553             Py_DECREF(result);
554             return NULL;
555         }
556         *pbuf = PyBytes_AS_STRING(result);
557         *plen = PyBytes_GET_SIZE(result);
558         return result;
559     }
560     /* does it support buffer protocol? */
561     if (PyObject_CheckBuffer(v)) {
562         /* maybe we can avoid making a copy of the buffer object here? */
563         result = _PyBytes_FromBuffer(v);
564         if (result == NULL)
565             return NULL;
566         *pbuf = PyBytes_AS_STRING(result);
567         *plen = PyBytes_GET_SIZE(result);
568         return result;
569     }
570     PyErr_Format(PyExc_TypeError,
571                  "%%b requires a bytes-like object, "
572                  "or an object that implements __bytes__, not '%.100s'",
573                  Py_TYPE(v)->tp_name);
574     return NULL;
575 }
576 
577 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
578 
579 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)580 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
581                   PyObject *args, int use_bytearray)
582 {
583     const char *fmt;
584     char *res;
585     Py_ssize_t arglen, argidx;
586     Py_ssize_t fmtcnt;
587     int args_owned = 0;
588     PyObject *dict = NULL;
589     _PyBytesWriter writer;
590 
591     if (args == NULL) {
592         PyErr_BadInternalCall();
593         return NULL;
594     }
595     fmt = format;
596     fmtcnt = format_len;
597 
598     _PyBytesWriter_Init(&writer);
599     writer.use_bytearray = use_bytearray;
600 
601     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
602     if (res == NULL)
603         return NULL;
604     if (!use_bytearray)
605         writer.overallocate = 1;
606 
607     if (PyTuple_Check(args)) {
608         arglen = PyTuple_GET_SIZE(args);
609         argidx = 0;
610     }
611     else {
612         arglen = -1;
613         argidx = -2;
614     }
615     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
616         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
617         !PyByteArray_Check(args)) {
618             dict = args;
619     }
620 
621     while (--fmtcnt >= 0) {
622         if (*fmt != '%') {
623             Py_ssize_t len;
624             char *pos;
625 
626             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
627             if (pos != NULL)
628                 len = pos - fmt;
629             else
630                 len = fmtcnt + 1;
631             assert(len != 0);
632 
633             memcpy(res, fmt, len);
634             res += len;
635             fmt += len;
636             fmtcnt -= (len - 1);
637         }
638         else {
639             /* Got a format specifier */
640             int flags = 0;
641             Py_ssize_t width = -1;
642             int prec = -1;
643             int c = '\0';
644             int fill;
645             PyObject *v = NULL;
646             PyObject *temp = NULL;
647             const char *pbuf = NULL;
648             int sign;
649             Py_ssize_t len = 0;
650             char onechar; /* For byte_converter() */
651             Py_ssize_t alloc;
652 
653             fmt++;
654             if (*fmt == '%') {
655                 *res++ = '%';
656                 fmt++;
657                 fmtcnt--;
658                 continue;
659             }
660             if (*fmt == '(') {
661                 const char *keystart;
662                 Py_ssize_t keylen;
663                 PyObject *key;
664                 int pcount = 1;
665 
666                 if (dict == NULL) {
667                     PyErr_SetString(PyExc_TypeError,
668                              "format requires a mapping");
669                     goto error;
670                 }
671                 ++fmt;
672                 --fmtcnt;
673                 keystart = fmt;
674                 /* Skip over balanced parentheses */
675                 while (pcount > 0 && --fmtcnt >= 0) {
676                     if (*fmt == ')')
677                         --pcount;
678                     else if (*fmt == '(')
679                         ++pcount;
680                     fmt++;
681                 }
682                 keylen = fmt - keystart - 1;
683                 if (fmtcnt < 0 || pcount > 0) {
684                     PyErr_SetString(PyExc_ValueError,
685                                "incomplete format key");
686                     goto error;
687                 }
688                 key = PyBytes_FromStringAndSize(keystart,
689                                                  keylen);
690                 if (key == NULL)
691                     goto error;
692                 if (args_owned) {
693                     Py_DECREF(args);
694                     args_owned = 0;
695                 }
696                 args = PyObject_GetItem(dict, key);
697                 Py_DECREF(key);
698                 if (args == NULL) {
699                     goto error;
700                 }
701                 args_owned = 1;
702                 arglen = -1;
703                 argidx = -2;
704             }
705 
706             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
707             while (--fmtcnt >= 0) {
708                 switch (c = *fmt++) {
709                 case '-': flags |= F_LJUST; continue;
710                 case '+': flags |= F_SIGN; continue;
711                 case ' ': flags |= F_BLANK; continue;
712                 case '#': flags |= F_ALT; continue;
713                 case '0': flags |= F_ZERO; continue;
714                 }
715                 break;
716             }
717 
718             /* Parse width. Example: "%10s" => width=10 */
719             if (c == '*') {
720                 v = getnextarg(args, arglen, &argidx);
721                 if (v == NULL)
722                     goto error;
723                 if (!PyLong_Check(v)) {
724                     PyErr_SetString(PyExc_TypeError,
725                                     "* wants int");
726                     goto error;
727                 }
728                 width = PyLong_AsSsize_t(v);
729                 if (width == -1 && PyErr_Occurred())
730                     goto error;
731                 if (width < 0) {
732                     flags |= F_LJUST;
733                     width = -width;
734                 }
735                 if (--fmtcnt >= 0)
736                     c = *fmt++;
737             }
738             else if (c >= 0 && isdigit(c)) {
739                 width = c - '0';
740                 while (--fmtcnt >= 0) {
741                     c = Py_CHARMASK(*fmt++);
742                     if (!isdigit(c))
743                         break;
744                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
745                         PyErr_SetString(
746                             PyExc_ValueError,
747                             "width too big");
748                         goto error;
749                     }
750                     width = width*10 + (c - '0');
751                 }
752             }
753 
754             /* Parse precision. Example: "%.3f" => prec=3 */
755             if (c == '.') {
756                 prec = 0;
757                 if (--fmtcnt >= 0)
758                     c = *fmt++;
759                 if (c == '*') {
760                     v = getnextarg(args, arglen, &argidx);
761                     if (v == NULL)
762                         goto error;
763                     if (!PyLong_Check(v)) {
764                         PyErr_SetString(
765                             PyExc_TypeError,
766                             "* wants int");
767                         goto error;
768                     }
769                     prec = _PyLong_AsInt(v);
770                     if (prec == -1 && PyErr_Occurred())
771                         goto error;
772                     if (prec < 0)
773                         prec = 0;
774                     if (--fmtcnt >= 0)
775                         c = *fmt++;
776                 }
777                 else if (c >= 0 && isdigit(c)) {
778                     prec = c - '0';
779                     while (--fmtcnt >= 0) {
780                         c = Py_CHARMASK(*fmt++);
781                         if (!isdigit(c))
782                             break;
783                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
784                             PyErr_SetString(
785                                 PyExc_ValueError,
786                                 "prec too big");
787                             goto error;
788                         }
789                         prec = prec*10 + (c - '0');
790                     }
791                 }
792             } /* prec */
793             if (fmtcnt >= 0) {
794                 if (c == 'h' || c == 'l' || c == 'L') {
795                     if (--fmtcnt >= 0)
796                         c = *fmt++;
797                 }
798             }
799             if (fmtcnt < 0) {
800                 PyErr_SetString(PyExc_ValueError,
801                                 "incomplete format");
802                 goto error;
803             }
804             v = getnextarg(args, arglen, &argidx);
805             if (v == NULL)
806                 goto error;
807 
808             if (fmtcnt == 0) {
809                 /* last write: disable writer overallocation */
810                 writer.overallocate = 0;
811             }
812 
813             sign = 0;
814             fill = ' ';
815             switch (c) {
816             case 'r':
817                 // %r is only for 2/3 code; 3 only code should use %a
818             case 'a':
819                 temp = PyObject_ASCII(v);
820                 if (temp == NULL)
821                     goto error;
822                 assert(PyUnicode_IS_ASCII(temp));
823                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
824                 len = PyUnicode_GET_LENGTH(temp);
825                 if (prec >= 0 && len > prec)
826                     len = prec;
827                 break;
828 
829             case 's':
830                 // %s is only for 2/3 code; 3 only code should use %b
831             case 'b':
832                 temp = format_obj(v, &pbuf, &len);
833                 if (temp == NULL)
834                     goto error;
835                 if (prec >= 0 && len > prec)
836                     len = prec;
837                 break;
838 
839             case 'i':
840             case 'd':
841             case 'u':
842             case 'o':
843             case 'x':
844             case 'X':
845                 if (PyLong_CheckExact(v)
846                     && width == -1 && prec == -1
847                     && !(flags & (F_SIGN | F_BLANK))
848                     && c != 'X')
849                 {
850                     /* Fast path */
851                     int alternate = flags & F_ALT;
852                     int base;
853 
854                     switch(c)
855                     {
856                         default:
857                             Py_UNREACHABLE();
858                         case 'd':
859                         case 'i':
860                         case 'u':
861                             base = 10;
862                             break;
863                         case 'o':
864                             base = 8;
865                             break;
866                         case 'x':
867                         case 'X':
868                             base = 16;
869                             break;
870                     }
871 
872                     /* Fast path */
873                     writer.min_size -= 2; /* size preallocated for "%d" */
874                     res = _PyLong_FormatBytesWriter(&writer, res,
875                                                     v, base, alternate);
876                     if (res == NULL)
877                         goto error;
878                     continue;
879                 }
880 
881                 temp = formatlong(v, flags, prec, c);
882                 if (!temp)
883                     goto error;
884                 assert(PyUnicode_IS_ASCII(temp));
885                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
886                 len = PyUnicode_GET_LENGTH(temp);
887                 sign = 1;
888                 if (flags & F_ZERO)
889                     fill = '0';
890                 break;
891 
892             case 'e':
893             case 'E':
894             case 'f':
895             case 'F':
896             case 'g':
897             case 'G':
898                 if (width == -1 && prec == -1
899                     && !(flags & (F_SIGN | F_BLANK)))
900                 {
901                     /* Fast path */
902                     writer.min_size -= 2; /* size preallocated for "%f" */
903                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
904                     if (res == NULL)
905                         goto error;
906                     continue;
907                 }
908 
909                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
910                     goto error;
911                 pbuf = PyBytes_AS_STRING(temp);
912                 len = PyBytes_GET_SIZE(temp);
913                 sign = 1;
914                 if (flags & F_ZERO)
915                     fill = '0';
916                 break;
917 
918             case 'c':
919                 pbuf = &onechar;
920                 len = byte_converter(v, &onechar);
921                 if (!len)
922                     goto error;
923                 if (width == -1) {
924                     /* Fast path */
925                     *res++ = onechar;
926                     continue;
927                 }
928                 break;
929 
930             default:
931                 PyErr_Format(PyExc_ValueError,
932                   "unsupported format character '%c' (0x%x) "
933                   "at index %zd",
934                   c, c,
935                   (Py_ssize_t)(fmt - 1 - format));
936                 goto error;
937             }
938 
939             if (sign) {
940                 if (*pbuf == '-' || *pbuf == '+') {
941                     sign = *pbuf++;
942                     len--;
943                 }
944                 else if (flags & F_SIGN)
945                     sign = '+';
946                 else if (flags & F_BLANK)
947                     sign = ' ';
948                 else
949                     sign = 0;
950             }
951             if (width < len)
952                 width = len;
953 
954             alloc = width;
955             if (sign != 0 && len == width)
956                 alloc++;
957             /* 2: size preallocated for %s */
958             if (alloc > 2) {
959                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
960                 if (res == NULL)
961                     goto error;
962             }
963 #ifndef NDEBUG
964             char *before = res;
965 #endif
966 
967             /* Write the sign if needed */
968             if (sign) {
969                 if (fill != ' ')
970                     *res++ = sign;
971                 if (width > len)
972                     width--;
973             }
974 
975             /* Write the numeric prefix for "x", "X" and "o" formats
976                if the alternate form is used.
977                For example, write "0x" for the "%#x" format. */
978             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
979                 assert(pbuf[0] == '0');
980                 assert(pbuf[1] == c);
981                 if (fill != ' ') {
982                     *res++ = *pbuf++;
983                     *res++ = *pbuf++;
984                 }
985                 width -= 2;
986                 if (width < 0)
987                     width = 0;
988                 len -= 2;
989             }
990 
991             /* Pad left with the fill character if needed */
992             if (width > len && !(flags & F_LJUST)) {
993                 memset(res, fill, width - len);
994                 res += (width - len);
995                 width = len;
996             }
997 
998             /* If padding with spaces: write sign if needed and/or numeric
999                prefix if the alternate form is used */
1000             if (fill == ' ') {
1001                 if (sign)
1002                     *res++ = sign;
1003                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1004                     assert(pbuf[0] == '0');
1005                     assert(pbuf[1] == c);
1006                     *res++ = *pbuf++;
1007                     *res++ = *pbuf++;
1008                 }
1009             }
1010 
1011             /* Copy bytes */
1012             memcpy(res, pbuf, len);
1013             res += len;
1014 
1015             /* Pad right with the fill character if needed */
1016             if (width > len) {
1017                 memset(res, ' ', width - len);
1018                 res += (width - len);
1019             }
1020 
1021             if (dict && (argidx < arglen)) {
1022                 PyErr_SetString(PyExc_TypeError,
1023                            "not all arguments converted during bytes formatting");
1024                 Py_XDECREF(temp);
1025                 goto error;
1026             }
1027             Py_XDECREF(temp);
1028 
1029 #ifndef NDEBUG
1030             /* check that we computed the exact size for this write */
1031             assert((res - before) == alloc);
1032 #endif
1033         } /* '%' */
1034 
1035         /* If overallocation was disabled, ensure that it was the last
1036            write. Otherwise, we missed an optimization */
1037         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1038     } /* until end */
1039 
1040     if (argidx < arglen && !dict) {
1041         PyErr_SetString(PyExc_TypeError,
1042                         "not all arguments converted during bytes formatting");
1043         goto error;
1044     }
1045 
1046     if (args_owned) {
1047         Py_DECREF(args);
1048     }
1049     return _PyBytesWriter_Finish(&writer, res);
1050 
1051  error:
1052     _PyBytesWriter_Dealloc(&writer);
1053     if (args_owned) {
1054         Py_DECREF(args);
1055     }
1056     return NULL;
1057 }
1058 
1059 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1060 PyObject *_PyBytes_DecodeEscape(const char *s,
1061                                 Py_ssize_t len,
1062                                 const char *errors,
1063                                 const char **first_invalid_escape)
1064 {
1065     int c;
1066     char *p;
1067     const char *end;
1068     _PyBytesWriter writer;
1069 
1070     _PyBytesWriter_Init(&writer);
1071 
1072     p = _PyBytesWriter_Alloc(&writer, len);
1073     if (p == NULL)
1074         return NULL;
1075     writer.overallocate = 1;
1076 
1077     *first_invalid_escape = NULL;
1078 
1079     end = s + len;
1080     while (s < end) {
1081         if (*s != '\\') {
1082             *p++ = *s++;
1083             continue;
1084         }
1085 
1086         s++;
1087         if (s == end) {
1088             PyErr_SetString(PyExc_ValueError,
1089                             "Trailing \\ in string");
1090             goto failed;
1091         }
1092 
1093         switch (*s++) {
1094         /* XXX This assumes ASCII! */
1095         case '\n': break;
1096         case '\\': *p++ = '\\'; break;
1097         case '\'': *p++ = '\''; break;
1098         case '\"': *p++ = '\"'; break;
1099         case 'b': *p++ = '\b'; break;
1100         case 'f': *p++ = '\014'; break; /* FF */
1101         case 't': *p++ = '\t'; break;
1102         case 'n': *p++ = '\n'; break;
1103         case 'r': *p++ = '\r'; break;
1104         case 'v': *p++ = '\013'; break; /* VT */
1105         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1106         case '0': case '1': case '2': case '3':
1107         case '4': case '5': case '6': case '7':
1108             c = s[-1] - '0';
1109             if (s < end && '0' <= *s && *s <= '7') {
1110                 c = (c<<3) + *s++ - '0';
1111                 if (s < end && '0' <= *s && *s <= '7')
1112                     c = (c<<3) + *s++ - '0';
1113             }
1114             if (c > 0377) {
1115                 if (*first_invalid_escape == NULL) {
1116                     *first_invalid_escape = s-3; /* Back up 3 chars, since we've
1117                                                     already incremented s. */
1118                 }
1119             }
1120             *p++ = c;
1121             break;
1122         case 'x':
1123             if (s+1 < end) {
1124                 int digit1, digit2;
1125                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1126                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1127                 if (digit1 < 16 && digit2 < 16) {
1128                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1129                     s += 2;
1130                     break;
1131                 }
1132             }
1133             /* invalid hexadecimal digits */
1134 
1135             if (!errors || strcmp(errors, "strict") == 0) {
1136                 PyErr_Format(PyExc_ValueError,
1137                              "invalid \\x escape at position %zd",
1138                              s - 2 - (end - len));
1139                 goto failed;
1140             }
1141             if (strcmp(errors, "replace") == 0) {
1142                 *p++ = '?';
1143             } else if (strcmp(errors, "ignore") == 0)
1144                 /* do nothing */;
1145             else {
1146                 PyErr_Format(PyExc_ValueError,
1147                              "decoding error; unknown "
1148                              "error handling code: %.400s",
1149                              errors);
1150                 goto failed;
1151             }
1152             /* skip \x */
1153             if (s < end && Py_ISXDIGIT(s[0]))
1154                 s++; /* and a hexdigit */
1155             break;
1156 
1157         default:
1158             if (*first_invalid_escape == NULL) {
1159                 *first_invalid_escape = s-1; /* Back up one char, since we've
1160                                                 already incremented s. */
1161             }
1162             *p++ = '\\';
1163             s--;
1164         }
1165     }
1166 
1167     return _PyBytesWriter_Finish(&writer, p);
1168 
1169   failed:
1170     _PyBytesWriter_Dealloc(&writer);
1171     return NULL;
1172 }
1173 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1174 PyObject *PyBytes_DecodeEscape(const char *s,
1175                                 Py_ssize_t len,
1176                                 const char *errors,
1177                                 Py_ssize_t Py_UNUSED(unicode),
1178                                 const char *Py_UNUSED(recode_encoding))
1179 {
1180     const char* first_invalid_escape;
1181     PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1182                                              &first_invalid_escape);
1183     if (result == NULL)
1184         return NULL;
1185     if (first_invalid_escape != NULL) {
1186         unsigned char c = *first_invalid_escape;
1187         if ('4' <= c && c <= '7') {
1188             if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1189                                  "invalid octal escape sequence '\\%.3s'",
1190                                  first_invalid_escape) < 0)
1191             {
1192                 Py_DECREF(result);
1193                 return NULL;
1194             }
1195         }
1196         else {
1197             if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1198                                  "invalid escape sequence '\\%c'",
1199                                  c) < 0)
1200             {
1201                 Py_DECREF(result);
1202                 return NULL;
1203             }
1204         }
1205     }
1206     return result;
1207 
1208 }
1209 /* -------------------------------------------------------------------- */
1210 /* object api */
1211 
1212 Py_ssize_t
PyBytes_Size(PyObject * op)1213 PyBytes_Size(PyObject *op)
1214 {
1215     if (!PyBytes_Check(op)) {
1216         PyErr_Format(PyExc_TypeError,
1217              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1218         return -1;
1219     }
1220     return Py_SIZE(op);
1221 }
1222 
1223 char *
PyBytes_AsString(PyObject * op)1224 PyBytes_AsString(PyObject *op)
1225 {
1226     if (!PyBytes_Check(op)) {
1227         PyErr_Format(PyExc_TypeError,
1228              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229         return NULL;
1230     }
1231     return ((PyBytesObject *)op)->ob_sval;
1232 }
1233 
1234 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1235 PyBytes_AsStringAndSize(PyObject *obj,
1236                          char **s,
1237                          Py_ssize_t *len)
1238 {
1239     if (s == NULL) {
1240         PyErr_BadInternalCall();
1241         return -1;
1242     }
1243 
1244     if (!PyBytes_Check(obj)) {
1245         PyErr_Format(PyExc_TypeError,
1246              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1247         return -1;
1248     }
1249 
1250     *s = PyBytes_AS_STRING(obj);
1251     if (len != NULL)
1252         *len = PyBytes_GET_SIZE(obj);
1253     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1254         PyErr_SetString(PyExc_ValueError,
1255                         "embedded null byte");
1256         return -1;
1257     }
1258     return 0;
1259 }
1260 
1261 /* -------------------------------------------------------------------- */
1262 /* Methods */
1263 
1264 #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1265 
1266 #include "stringlib/stringdefs.h"
1267 #define STRINGLIB_MUTABLE 0
1268 
1269 #include "stringlib/fastsearch.h"
1270 #include "stringlib/count.h"
1271 #include "stringlib/find.h"
1272 #include "stringlib/join.h"
1273 #include "stringlib/partition.h"
1274 #include "stringlib/split.h"
1275 #include "stringlib/ctype.h"
1276 
1277 #include "stringlib/transmogrify.h"
1278 
1279 #undef STRINGLIB_GET_EMPTY
1280 
1281 Py_ssize_t
_PyBytes_Find(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1282 _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1283               const char *needle, Py_ssize_t len_needle,
1284               Py_ssize_t offset)
1285 {
1286     return stringlib_find(haystack, len_haystack,
1287                           needle, len_needle, offset);
1288 }
1289 
1290 Py_ssize_t
_PyBytes_ReverseFind(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1291 _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1292                      const char *needle, Py_ssize_t len_needle,
1293                      Py_ssize_t offset)
1294 {
1295     return stringlib_rfind(haystack, len_haystack,
1296                            needle, len_needle, offset);
1297 }
1298 
1299 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1300 PyBytes_Repr(PyObject *obj, int smartquotes)
1301 {
1302     PyBytesObject* op = (PyBytesObject*) obj;
1303     Py_ssize_t i, length = Py_SIZE(op);
1304     Py_ssize_t newsize, squotes, dquotes;
1305     PyObject *v;
1306     unsigned char quote;
1307     const unsigned char *s;
1308     Py_UCS1 *p;
1309 
1310     /* Compute size of output string */
1311     squotes = dquotes = 0;
1312     newsize = 3; /* b'' */
1313     s = (const unsigned char*)op->ob_sval;
1314     for (i = 0; i < length; i++) {
1315         Py_ssize_t incr = 1;
1316         switch(s[i]) {
1317         case '\'': squotes++; break;
1318         case '"':  dquotes++; break;
1319         case '\\': case '\t': case '\n': case '\r':
1320             incr = 2; break; /* \C */
1321         default:
1322             if (s[i] < ' ' || s[i] >= 0x7f)
1323                 incr = 4; /* \xHH */
1324         }
1325         if (newsize > PY_SSIZE_T_MAX - incr)
1326             goto overflow;
1327         newsize += incr;
1328     }
1329     quote = '\'';
1330     if (smartquotes && squotes && !dquotes)
1331         quote = '"';
1332     if (squotes && quote == '\'') {
1333         if (newsize > PY_SSIZE_T_MAX - squotes)
1334             goto overflow;
1335         newsize += squotes;
1336     }
1337 
1338     v = PyUnicode_New(newsize, 127);
1339     if (v == NULL) {
1340         return NULL;
1341     }
1342     p = PyUnicode_1BYTE_DATA(v);
1343 
1344     *p++ = 'b', *p++ = quote;
1345     for (i = 0; i < length; i++) {
1346         unsigned char c = op->ob_sval[i];
1347         if (c == quote || c == '\\')
1348             *p++ = '\\', *p++ = c;
1349         else if (c == '\t')
1350             *p++ = '\\', *p++ = 't';
1351         else if (c == '\n')
1352             *p++ = '\\', *p++ = 'n';
1353         else if (c == '\r')
1354             *p++ = '\\', *p++ = 'r';
1355         else if (c < ' ' || c >= 0x7f) {
1356             *p++ = '\\';
1357             *p++ = 'x';
1358             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1359             *p++ = Py_hexdigits[c & 0xf];
1360         }
1361         else
1362             *p++ = c;
1363     }
1364     *p++ = quote;
1365     assert(_PyUnicode_CheckConsistency(v, 1));
1366     return v;
1367 
1368   overflow:
1369     PyErr_SetString(PyExc_OverflowError,
1370                     "bytes object is too large to make repr");
1371     return NULL;
1372 }
1373 
1374 static PyObject *
bytes_repr(PyObject * op)1375 bytes_repr(PyObject *op)
1376 {
1377     return PyBytes_Repr(op, 1);
1378 }
1379 
1380 static PyObject *
bytes_str(PyObject * op)1381 bytes_str(PyObject *op)
1382 {
1383     if (_Py_GetConfig()->bytes_warning) {
1384         if (PyErr_WarnEx(PyExc_BytesWarning,
1385                          "str() on a bytes instance", 1)) {
1386             return NULL;
1387         }
1388     }
1389     return bytes_repr(op);
1390 }
1391 
1392 static Py_ssize_t
bytes_length(PyBytesObject * a)1393 bytes_length(PyBytesObject *a)
1394 {
1395     return Py_SIZE(a);
1396 }
1397 
1398 /* This is also used by PyBytes_Concat() */
1399 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1400 bytes_concat(PyObject *a, PyObject *b)
1401 {
1402     Py_buffer va, vb;
1403     PyObject *result = NULL;
1404 
1405     va.len = -1;
1406     vb.len = -1;
1407     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1408         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1409         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1410                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1411         goto done;
1412     }
1413 
1414     /* Optimize end cases */
1415     if (va.len == 0 && PyBytes_CheckExact(b)) {
1416         result = b;
1417         Py_INCREF(result);
1418         goto done;
1419     }
1420     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1421         result = a;
1422         Py_INCREF(result);
1423         goto done;
1424     }
1425 
1426     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1427         PyErr_NoMemory();
1428         goto done;
1429     }
1430 
1431     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1432     if (result != NULL) {
1433         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1434         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1435     }
1436 
1437   done:
1438     if (va.len != -1)
1439         PyBuffer_Release(&va);
1440     if (vb.len != -1)
1441         PyBuffer_Release(&vb);
1442     return result;
1443 }
1444 
1445 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1446 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1447 {
1448     Py_ssize_t size;
1449     PyBytesObject *op;
1450     size_t nbytes;
1451     if (n < 0)
1452         n = 0;
1453     /* watch out for overflows:  the size can overflow int,
1454      * and the # of bytes needed can overflow size_t
1455      */
1456     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1457         PyErr_SetString(PyExc_OverflowError,
1458             "repeated bytes are too long");
1459         return NULL;
1460     }
1461     size = Py_SIZE(a) * n;
1462     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1463         Py_INCREF(a);
1464         return (PyObject *)a;
1465     }
1466     nbytes = (size_t)size;
1467     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1468         PyErr_SetString(PyExc_OverflowError,
1469             "repeated bytes are too long");
1470         return NULL;
1471     }
1472     op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1473     if (op == NULL) {
1474         return PyErr_NoMemory();
1475     }
1476     _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1477 _Py_COMP_DIAG_PUSH
1478 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1479     op->ob_shash = -1;
1480 _Py_COMP_DIAG_POP
1481     op->ob_sval[size] = '\0';
1482 
1483     _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1484 
1485     return (PyObject *) op;
1486 }
1487 
1488 static int
bytes_contains(PyObject * self,PyObject * arg)1489 bytes_contains(PyObject *self, PyObject *arg)
1490 {
1491     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1492 }
1493 
1494 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1495 bytes_item(PyBytesObject *a, Py_ssize_t i)
1496 {
1497     if (i < 0 || i >= Py_SIZE(a)) {
1498         PyErr_SetString(PyExc_IndexError, "index out of range");
1499         return NULL;
1500     }
1501     return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1502 }
1503 
1504 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1505 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1506 {
1507     int cmp;
1508     Py_ssize_t len;
1509 
1510     len = Py_SIZE(a);
1511     if (Py_SIZE(b) != len)
1512         return 0;
1513 
1514     if (a->ob_sval[0] != b->ob_sval[0])
1515         return 0;
1516 
1517     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1518     return (cmp == 0);
1519 }
1520 
1521 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1522 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1523 {
1524     int c;
1525     Py_ssize_t len_a, len_b;
1526     Py_ssize_t min_len;
1527 
1528     /* Make sure both arguments are strings. */
1529     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1530         if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1531             if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1532                 if (PyErr_WarnEx(PyExc_BytesWarning,
1533                                  "Comparison between bytes and string", 1))
1534                     return NULL;
1535             }
1536             if (PyLong_Check(a) || PyLong_Check(b)) {
1537                 if (PyErr_WarnEx(PyExc_BytesWarning,
1538                                  "Comparison between bytes and int", 1))
1539                     return NULL;
1540             }
1541         }
1542         Py_RETURN_NOTIMPLEMENTED;
1543     }
1544     else if (a == b) {
1545         switch (op) {
1546         case Py_EQ:
1547         case Py_LE:
1548         case Py_GE:
1549             /* a byte string is equal to itself */
1550             Py_RETURN_TRUE;
1551         case Py_NE:
1552         case Py_LT:
1553         case Py_GT:
1554             Py_RETURN_FALSE;
1555         default:
1556             PyErr_BadArgument();
1557             return NULL;
1558         }
1559     }
1560     else if (op == Py_EQ || op == Py_NE) {
1561         int eq = bytes_compare_eq(a, b);
1562         eq ^= (op == Py_NE);
1563         return PyBool_FromLong(eq);
1564     }
1565     else {
1566         len_a = Py_SIZE(a);
1567         len_b = Py_SIZE(b);
1568         min_len = Py_MIN(len_a, len_b);
1569         if (min_len > 0) {
1570             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1571             if (c == 0)
1572                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1573         }
1574         else
1575             c = 0;
1576         if (c != 0)
1577             Py_RETURN_RICHCOMPARE(c, 0, op);
1578         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1579     }
1580 }
1581 
1582 static Py_hash_t
bytes_hash(PyBytesObject * a)1583 bytes_hash(PyBytesObject *a)
1584 {
1585 _Py_COMP_DIAG_PUSH
1586 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1587     if (a->ob_shash == -1) {
1588         /* Can't fail */
1589         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1590     }
1591     return a->ob_shash;
1592 _Py_COMP_DIAG_POP
1593 }
1594 
1595 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1596 bytes_subscript(PyBytesObject* self, PyObject* item)
1597 {
1598     if (_PyIndex_Check(item)) {
1599         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1600         if (i == -1 && PyErr_Occurred())
1601             return NULL;
1602         if (i < 0)
1603             i += PyBytes_GET_SIZE(self);
1604         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1605             PyErr_SetString(PyExc_IndexError,
1606                             "index out of range");
1607             return NULL;
1608         }
1609         return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1610     }
1611     else if (PySlice_Check(item)) {
1612         Py_ssize_t start, stop, step, slicelength, i;
1613         size_t cur;
1614         const char* source_buf;
1615         char* result_buf;
1616         PyObject* result;
1617 
1618         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1619             return NULL;
1620         }
1621         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1622                                             &stop, step);
1623 
1624         if (slicelength <= 0) {
1625             return PyBytes_FromStringAndSize("", 0);
1626         }
1627         else if (start == 0 && step == 1 &&
1628                  slicelength == PyBytes_GET_SIZE(self) &&
1629                  PyBytes_CheckExact(self)) {
1630             Py_INCREF(self);
1631             return (PyObject *)self;
1632         }
1633         else if (step == 1) {
1634             return PyBytes_FromStringAndSize(
1635                 PyBytes_AS_STRING(self) + start,
1636                 slicelength);
1637         }
1638         else {
1639             source_buf = PyBytes_AS_STRING(self);
1640             result = PyBytes_FromStringAndSize(NULL, slicelength);
1641             if (result == NULL)
1642                 return NULL;
1643 
1644             result_buf = PyBytes_AS_STRING(result);
1645             for (cur = start, i = 0; i < slicelength;
1646                  cur += step, i++) {
1647                 result_buf[i] = source_buf[cur];
1648             }
1649 
1650             return result;
1651         }
1652     }
1653     else {
1654         PyErr_Format(PyExc_TypeError,
1655                      "byte indices must be integers or slices, not %.200s",
1656                      Py_TYPE(item)->tp_name);
1657         return NULL;
1658     }
1659 }
1660 
1661 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1662 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1663 {
1664     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1665                              1, flags);
1666 }
1667 
1668 static PySequenceMethods bytes_as_sequence = {
1669     (lenfunc)bytes_length, /*sq_length*/
1670     (binaryfunc)bytes_concat, /*sq_concat*/
1671     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1672     (ssizeargfunc)bytes_item, /*sq_item*/
1673     0,                  /*sq_slice*/
1674     0,                  /*sq_ass_item*/
1675     0,                  /*sq_ass_slice*/
1676     (objobjproc)bytes_contains /*sq_contains*/
1677 };
1678 
1679 static PyMappingMethods bytes_as_mapping = {
1680     (lenfunc)bytes_length,
1681     (binaryfunc)bytes_subscript,
1682     0,
1683 };
1684 
1685 static PyBufferProcs bytes_as_buffer = {
1686     (getbufferproc)bytes_buffer_getbuffer,
1687     NULL,
1688 };
1689 
1690 
1691 /*[clinic input]
1692 bytes.__bytes__
1693 Convert this value to exact type bytes.
1694 [clinic start generated code]*/
1695 
1696 static PyObject *
bytes___bytes___impl(PyBytesObject * self)1697 bytes___bytes___impl(PyBytesObject *self)
1698 /*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1699 {
1700     if (PyBytes_CheckExact(self)) {
1701         Py_INCREF(self);
1702         return (PyObject *)self;
1703     }
1704     else {
1705         return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1706     }
1707 }
1708 
1709 
1710 #define LEFTSTRIP 0
1711 #define RIGHTSTRIP 1
1712 #define BOTHSTRIP 2
1713 
1714 /*[clinic input]
1715 bytes.split
1716 
1717     sep: object = None
1718         The delimiter according which to split the bytes.
1719         None (the default value) means split on ASCII whitespace characters
1720         (space, tab, return, newline, formfeed, vertical tab).
1721     maxsplit: Py_ssize_t = -1
1722         Maximum number of splits to do.
1723         -1 (the default value) means no limit.
1724 
1725 Return a list of the sections in the bytes, using sep as the delimiter.
1726 [clinic start generated code]*/
1727 
1728 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1729 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1730 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1731 {
1732     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1733     const char *s = PyBytes_AS_STRING(self), *sub;
1734     Py_buffer vsub;
1735     PyObject *list;
1736 
1737     if (maxsplit < 0)
1738         maxsplit = PY_SSIZE_T_MAX;
1739     if (sep == Py_None)
1740         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1741     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1742         return NULL;
1743     sub = vsub.buf;
1744     n = vsub.len;
1745 
1746     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1747     PyBuffer_Release(&vsub);
1748     return list;
1749 }
1750 
1751 /*[clinic input]
1752 bytes.partition
1753 
1754     sep: Py_buffer
1755     /
1756 
1757 Partition the bytes into three parts using the given separator.
1758 
1759 This will search for the separator sep in the bytes. If the separator is found,
1760 returns a 3-tuple containing the part before the separator, the separator
1761 itself, and the part after it.
1762 
1763 If the separator is not found, returns a 3-tuple containing the original bytes
1764 object and two empty bytes objects.
1765 [clinic start generated code]*/
1766 
1767 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1768 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1769 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1770 {
1771     return stringlib_partition(
1772         (PyObject*) self,
1773         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1774         sep->obj, (const char *)sep->buf, sep->len
1775         );
1776 }
1777 
1778 /*[clinic input]
1779 bytes.rpartition
1780 
1781     sep: Py_buffer
1782     /
1783 
1784 Partition the bytes into three parts using the given separator.
1785 
1786 This will search for the separator sep in the bytes, starting at the end. If
1787 the separator is found, returns a 3-tuple containing the part before the
1788 separator, the separator itself, and the part after it.
1789 
1790 If the separator is not found, returns a 3-tuple containing two empty bytes
1791 objects and the original bytes object.
1792 [clinic start generated code]*/
1793 
1794 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1795 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1796 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1797 {
1798     return stringlib_rpartition(
1799         (PyObject*) self,
1800         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1801         sep->obj, (const char *)sep->buf, sep->len
1802         );
1803 }
1804 
1805 /*[clinic input]
1806 bytes.rsplit = bytes.split
1807 
1808 Return a list of the sections in the bytes, using sep as the delimiter.
1809 
1810 Splitting is done starting at the end of the bytes and working to the front.
1811 [clinic start generated code]*/
1812 
1813 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1814 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1815 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1816 {
1817     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1818     const char *s = PyBytes_AS_STRING(self), *sub;
1819     Py_buffer vsub;
1820     PyObject *list;
1821 
1822     if (maxsplit < 0)
1823         maxsplit = PY_SSIZE_T_MAX;
1824     if (sep == Py_None)
1825         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1826     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1827         return NULL;
1828     sub = vsub.buf;
1829     n = vsub.len;
1830 
1831     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1832     PyBuffer_Release(&vsub);
1833     return list;
1834 }
1835 
1836 
1837 /*[clinic input]
1838 bytes.join
1839 
1840     iterable_of_bytes: object
1841     /
1842 
1843 Concatenate any number of bytes objects.
1844 
1845 The bytes whose method is called is inserted in between each pair.
1846 
1847 The result is returned as a new bytes object.
1848 
1849 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1850 [clinic start generated code]*/
1851 
1852 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1853 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1854 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1855 {
1856     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1857 }
1858 
1859 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1860 _PyBytes_Join(PyObject *sep, PyObject *x)
1861 {
1862     assert(sep != NULL && PyBytes_Check(sep));
1863     assert(x != NULL);
1864     return bytes_join((PyBytesObject*)sep, x);
1865 }
1866 
1867 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1868 bytes_find(PyBytesObject *self, PyObject *args)
1869 {
1870     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871 }
1872 
1873 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1874 bytes_index(PyBytesObject *self, PyObject *args)
1875 {
1876     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1877 }
1878 
1879 
1880 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1881 bytes_rfind(PyBytesObject *self, PyObject *args)
1882 {
1883     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1884 }
1885 
1886 
1887 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1888 bytes_rindex(PyBytesObject *self, PyObject *args)
1889 {
1890     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1891 }
1892 
1893 
1894 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1895 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1896 {
1897     Py_buffer vsep;
1898     const char *s = PyBytes_AS_STRING(self);
1899     Py_ssize_t len = PyBytes_GET_SIZE(self);
1900     char *sep;
1901     Py_ssize_t seplen;
1902     Py_ssize_t i, j;
1903 
1904     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1905         return NULL;
1906     sep = vsep.buf;
1907     seplen = vsep.len;
1908 
1909     i = 0;
1910     if (striptype != RIGHTSTRIP) {
1911         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1912             i++;
1913         }
1914     }
1915 
1916     j = len;
1917     if (striptype != LEFTSTRIP) {
1918         do {
1919             j--;
1920         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1921         j++;
1922     }
1923 
1924     PyBuffer_Release(&vsep);
1925 
1926     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1927         Py_INCREF(self);
1928         return (PyObject*)self;
1929     }
1930     else
1931         return PyBytes_FromStringAndSize(s+i, j-i);
1932 }
1933 
1934 
1935 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1936 do_strip(PyBytesObject *self, int striptype)
1937 {
1938     const char *s = PyBytes_AS_STRING(self);
1939     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1940 
1941     i = 0;
1942     if (striptype != RIGHTSTRIP) {
1943         while (i < len && Py_ISSPACE(s[i])) {
1944             i++;
1945         }
1946     }
1947 
1948     j = len;
1949     if (striptype != LEFTSTRIP) {
1950         do {
1951             j--;
1952         } while (j >= i && Py_ISSPACE(s[j]));
1953         j++;
1954     }
1955 
1956     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1957         Py_INCREF(self);
1958         return (PyObject*)self;
1959     }
1960     else
1961         return PyBytes_FromStringAndSize(s+i, j-i);
1962 }
1963 
1964 
1965 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1966 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1967 {
1968     if (bytes != Py_None) {
1969         return do_xstrip(self, striptype, bytes);
1970     }
1971     return do_strip(self, striptype);
1972 }
1973 
1974 /*[clinic input]
1975 bytes.strip
1976 
1977     bytes: object = None
1978     /
1979 
1980 Strip leading and trailing bytes contained in the argument.
1981 
1982 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1983 [clinic start generated code]*/
1984 
1985 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)1986 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1987 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1988 {
1989     return do_argstrip(self, BOTHSTRIP, bytes);
1990 }
1991 
1992 /*[clinic input]
1993 bytes.lstrip
1994 
1995     bytes: object = None
1996     /
1997 
1998 Strip leading bytes contained in the argument.
1999 
2000 If the argument is omitted or None, strip leading  ASCII whitespace.
2001 [clinic start generated code]*/
2002 
2003 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2004 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2005 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2006 {
2007     return do_argstrip(self, LEFTSTRIP, bytes);
2008 }
2009 
2010 /*[clinic input]
2011 bytes.rstrip
2012 
2013     bytes: object = None
2014     /
2015 
2016 Strip trailing bytes contained in the argument.
2017 
2018 If the argument is omitted or None, strip trailing ASCII whitespace.
2019 [clinic start generated code]*/
2020 
2021 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2022 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2023 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2024 {
2025     return do_argstrip(self, RIGHTSTRIP, bytes);
2026 }
2027 
2028 
2029 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2030 bytes_count(PyBytesObject *self, PyObject *args)
2031 {
2032     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2033 }
2034 
2035 
2036 /*[clinic input]
2037 bytes.translate
2038 
2039     table: object
2040         Translation table, which must be a bytes object of length 256.
2041     /
2042     delete as deletechars: object(c_default="NULL") = b''
2043 
2044 Return a copy with each character mapped by the given translation table.
2045 
2046 All characters occurring in the optional argument delete are removed.
2047 The remaining characters are mapped through the given translation table.
2048 [clinic start generated code]*/
2049 
2050 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2051 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2052                      PyObject *deletechars)
2053 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2054 {
2055     const char *input;
2056     char *output;
2057     Py_buffer table_view = {NULL, NULL};
2058     Py_buffer del_table_view = {NULL, NULL};
2059     const char *table_chars;
2060     Py_ssize_t i, c, changed = 0;
2061     PyObject *input_obj = (PyObject*)self;
2062     const char *output_start, *del_table_chars=NULL;
2063     Py_ssize_t inlen, tablen, dellen = 0;
2064     PyObject *result;
2065     int trans_table[256];
2066 
2067     if (PyBytes_Check(table)) {
2068         table_chars = PyBytes_AS_STRING(table);
2069         tablen = PyBytes_GET_SIZE(table);
2070     }
2071     else if (table == Py_None) {
2072         table_chars = NULL;
2073         tablen = 256;
2074     }
2075     else {
2076         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2077             return NULL;
2078         table_chars = table_view.buf;
2079         tablen = table_view.len;
2080     }
2081 
2082     if (tablen != 256) {
2083         PyErr_SetString(PyExc_ValueError,
2084           "translation table must be 256 characters long");
2085         PyBuffer_Release(&table_view);
2086         return NULL;
2087     }
2088 
2089     if (deletechars != NULL) {
2090         if (PyBytes_Check(deletechars)) {
2091             del_table_chars = PyBytes_AS_STRING(deletechars);
2092             dellen = PyBytes_GET_SIZE(deletechars);
2093         }
2094         else {
2095             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2096                 PyBuffer_Release(&table_view);
2097                 return NULL;
2098             }
2099             del_table_chars = del_table_view.buf;
2100             dellen = del_table_view.len;
2101         }
2102     }
2103     else {
2104         del_table_chars = NULL;
2105         dellen = 0;
2106     }
2107 
2108     inlen = PyBytes_GET_SIZE(input_obj);
2109     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2110     if (result == NULL) {
2111         PyBuffer_Release(&del_table_view);
2112         PyBuffer_Release(&table_view);
2113         return NULL;
2114     }
2115     output_start = output = PyBytes_AS_STRING(result);
2116     input = PyBytes_AS_STRING(input_obj);
2117 
2118     if (dellen == 0 && table_chars != NULL) {
2119         /* If no deletions are required, use faster code */
2120         for (i = inlen; --i >= 0; ) {
2121             c = Py_CHARMASK(*input++);
2122             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2123                 changed = 1;
2124         }
2125         if (!changed && PyBytes_CheckExact(input_obj)) {
2126             Py_INCREF(input_obj);
2127             Py_DECREF(result);
2128             result = input_obj;
2129         }
2130         PyBuffer_Release(&del_table_view);
2131         PyBuffer_Release(&table_view);
2132         return result;
2133     }
2134 
2135     if (table_chars == NULL) {
2136         for (i = 0; i < 256; i++)
2137             trans_table[i] = Py_CHARMASK(i);
2138     } else {
2139         for (i = 0; i < 256; i++)
2140             trans_table[i] = Py_CHARMASK(table_chars[i]);
2141     }
2142     PyBuffer_Release(&table_view);
2143 
2144     for (i = 0; i < dellen; i++)
2145         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2146     PyBuffer_Release(&del_table_view);
2147 
2148     for (i = inlen; --i >= 0; ) {
2149         c = Py_CHARMASK(*input++);
2150         if (trans_table[c] != -1)
2151             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2152                 continue;
2153         changed = 1;
2154     }
2155     if (!changed && PyBytes_CheckExact(input_obj)) {
2156         Py_DECREF(result);
2157         Py_INCREF(input_obj);
2158         return input_obj;
2159     }
2160     /* Fix the size of the resulting byte string */
2161     if (inlen > 0)
2162         _PyBytes_Resize(&result, output - output_start);
2163     return result;
2164 }
2165 
2166 
2167 /*[clinic input]
2168 
2169 @staticmethod
2170 bytes.maketrans
2171 
2172     frm: Py_buffer
2173     to: Py_buffer
2174     /
2175 
2176 Return a translation table useable for the bytes or bytearray translate method.
2177 
2178 The returned table will be one where each byte in frm is mapped to the byte at
2179 the same position in to.
2180 
2181 The bytes objects frm and to must be of the same length.
2182 [clinic start generated code]*/
2183 
2184 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2185 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2186 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2187 {
2188     return _Py_bytes_maketrans(frm, to);
2189 }
2190 
2191 
2192 /*[clinic input]
2193 bytes.replace
2194 
2195     old: Py_buffer
2196     new: Py_buffer
2197     count: Py_ssize_t = -1
2198         Maximum number of occurrences to replace.
2199         -1 (the default value) means replace all occurrences.
2200     /
2201 
2202 Return a copy with all occurrences of substring old replaced by new.
2203 
2204 If the optional argument count is given, only the first count occurrences are
2205 replaced.
2206 [clinic start generated code]*/
2207 
2208 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2209 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2210                    Py_ssize_t count)
2211 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2212 {
2213     return stringlib_replace((PyObject *)self,
2214                              (const char *)old->buf, old->len,
2215                              (const char *)new->buf, new->len, count);
2216 }
2217 
2218 /** End DALKE **/
2219 
2220 /*[clinic input]
2221 bytes.removeprefix as bytes_removeprefix
2222 
2223     prefix: Py_buffer
2224     /
2225 
2226 Return a bytes object with the given prefix string removed if present.
2227 
2228 If the bytes starts with the prefix string, return bytes[len(prefix):].
2229 Otherwise, return a copy of the original bytes.
2230 [clinic start generated code]*/
2231 
2232 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2233 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2234 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2235 {
2236     const char *self_start = PyBytes_AS_STRING(self);
2237     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2238     const char *prefix_start = prefix->buf;
2239     Py_ssize_t prefix_len = prefix->len;
2240 
2241     if (self_len >= prefix_len
2242         && prefix_len > 0
2243         && memcmp(self_start, prefix_start, prefix_len) == 0)
2244     {
2245         return PyBytes_FromStringAndSize(self_start + prefix_len,
2246                                          self_len - prefix_len);
2247     }
2248 
2249     if (PyBytes_CheckExact(self)) {
2250         Py_INCREF(self);
2251         return (PyObject *)self;
2252     }
2253 
2254     return PyBytes_FromStringAndSize(self_start, self_len);
2255 }
2256 
2257 /*[clinic input]
2258 bytes.removesuffix as bytes_removesuffix
2259 
2260     suffix: Py_buffer
2261     /
2262 
2263 Return a bytes object with the given suffix string removed if present.
2264 
2265 If the bytes ends with the suffix string and that suffix is not empty,
2266 return bytes[:-len(prefix)].  Otherwise, return a copy of the original
2267 bytes.
2268 [clinic start generated code]*/
2269 
2270 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2271 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2272 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2273 {
2274     const char *self_start = PyBytes_AS_STRING(self);
2275     Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2276     const char *suffix_start = suffix->buf;
2277     Py_ssize_t suffix_len = suffix->len;
2278 
2279     if (self_len >= suffix_len
2280         && suffix_len > 0
2281         && memcmp(self_start + self_len - suffix_len,
2282                   suffix_start, suffix_len) == 0)
2283     {
2284         return PyBytes_FromStringAndSize(self_start,
2285                                          self_len - suffix_len);
2286     }
2287 
2288     if (PyBytes_CheckExact(self)) {
2289         Py_INCREF(self);
2290         return (PyObject *)self;
2291     }
2292 
2293     return PyBytes_FromStringAndSize(self_start, self_len);
2294 }
2295 
2296 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2297 bytes_startswith(PyBytesObject *self, PyObject *args)
2298 {
2299     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2300 }
2301 
2302 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2303 bytes_endswith(PyBytesObject *self, PyObject *args)
2304 {
2305     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2306 }
2307 
2308 
2309 /*[clinic input]
2310 bytes.decode
2311 
2312     encoding: str(c_default="NULL") = 'utf-8'
2313         The encoding with which to decode the bytes.
2314     errors: str(c_default="NULL") = 'strict'
2315         The error handling scheme to use for the handling of decoding errors.
2316         The default is 'strict' meaning that decoding errors raise a
2317         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2318         as well as any other name registered with codecs.register_error that
2319         can handle UnicodeDecodeErrors.
2320 
2321 Decode the bytes using the codec registered for encoding.
2322 [clinic start generated code]*/
2323 
2324 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2325 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2326                   const char *errors)
2327 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2328 {
2329     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2330 }
2331 
2332 
2333 /*[clinic input]
2334 bytes.splitlines
2335 
2336     keepends: bool(accept={int}) = False
2337 
2338 Return a list of the lines in the bytes, breaking at line boundaries.
2339 
2340 Line breaks are not included in the resulting list unless keepends is given and
2341 true.
2342 [clinic start generated code]*/
2343 
2344 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2345 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2346 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2347 {
2348     return stringlib_splitlines(
2349         (PyObject*) self, PyBytes_AS_STRING(self),
2350         PyBytes_GET_SIZE(self), keepends
2351         );
2352 }
2353 
2354 /*[clinic input]
2355 @classmethod
2356 bytes.fromhex
2357 
2358     string: unicode
2359     /
2360 
2361 Create a bytes object from a string of hexadecimal numbers.
2362 
2363 Spaces between two numbers are accepted.
2364 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2365 [clinic start generated code]*/
2366 
2367 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2368 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2369 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2370 {
2371     PyObject *result = _PyBytes_FromHex(string, 0);
2372     if (type != &PyBytes_Type && result != NULL) {
2373         Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2374     }
2375     return result;
2376 }
2377 
2378 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2379 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2380 {
2381     char *buf;
2382     Py_ssize_t hexlen, invalid_char;
2383     unsigned int top, bot;
2384     const Py_UCS1 *str, *end;
2385     _PyBytesWriter writer;
2386 
2387     _PyBytesWriter_Init(&writer);
2388     writer.use_bytearray = use_bytearray;
2389 
2390     assert(PyUnicode_Check(string));
2391     if (PyUnicode_READY(string))
2392         return NULL;
2393     hexlen = PyUnicode_GET_LENGTH(string);
2394 
2395     if (!PyUnicode_IS_ASCII(string)) {
2396         const void *data = PyUnicode_DATA(string);
2397         unsigned int kind = PyUnicode_KIND(string);
2398         Py_ssize_t i;
2399 
2400         /* search for the first non-ASCII character */
2401         for (i = 0; i < hexlen; i++) {
2402             if (PyUnicode_READ(kind, data, i) >= 128)
2403                 break;
2404         }
2405         invalid_char = i;
2406         goto error;
2407     }
2408 
2409     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2410     str = PyUnicode_1BYTE_DATA(string);
2411 
2412     /* This overestimates if there are spaces */
2413     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2414     if (buf == NULL)
2415         return NULL;
2416 
2417     end = str + hexlen;
2418     while (str < end) {
2419         /* skip over spaces in the input */
2420         if (Py_ISSPACE(*str)) {
2421             do {
2422                 str++;
2423             } while (Py_ISSPACE(*str));
2424             if (str >= end)
2425                 break;
2426         }
2427 
2428         top = _PyLong_DigitValue[*str];
2429         if (top >= 16) {
2430             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2431             goto error;
2432         }
2433         str++;
2434 
2435         bot = _PyLong_DigitValue[*str];
2436         if (bot >= 16) {
2437             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2438             goto error;
2439         }
2440         str++;
2441 
2442         *buf++ = (unsigned char)((top << 4) + bot);
2443     }
2444 
2445     return _PyBytesWriter_Finish(&writer, buf);
2446 
2447   error:
2448     PyErr_Format(PyExc_ValueError,
2449                  "non-hexadecimal number found in "
2450                  "fromhex() arg at position %zd", invalid_char);
2451     _PyBytesWriter_Dealloc(&writer);
2452     return NULL;
2453 }
2454 
2455 /*[clinic input]
2456 bytes.hex
2457 
2458     sep: object = NULL
2459         An optional single character or byte to separate hex bytes.
2460     bytes_per_sep: int = 1
2461         How many bytes between separators.  Positive values count from the
2462         right, negative values count from the left.
2463 
2464 Create a string of hexadecimal numbers from a bytes object.
2465 
2466 Example:
2467 >>> value = b'\xb9\x01\xef'
2468 >>> value.hex()
2469 'b901ef'
2470 >>> value.hex(':')
2471 'b9:01:ef'
2472 >>> value.hex(':', 2)
2473 'b9:01ef'
2474 >>> value.hex(':', -2)
2475 'b901:ef'
2476 [clinic start generated code]*/
2477 
2478 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2479 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2480 /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2481 {
2482     const char *argbuf = PyBytes_AS_STRING(self);
2483     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2484     return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2485 }
2486 
2487 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2488 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2489 {
2490     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2491 }
2492 
2493 
2494 static PyMethodDef
2495 bytes_methods[] = {
2496     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2497     BYTES___BYTES___METHODDEF
2498     {"capitalize", stringlib_capitalize, METH_NOARGS,
2499      _Py_capitalize__doc__},
2500     STRINGLIB_CENTER_METHODDEF
2501     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2502      _Py_count__doc__},
2503     BYTES_DECODE_METHODDEF
2504     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2505      _Py_endswith__doc__},
2506     STRINGLIB_EXPANDTABS_METHODDEF
2507     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2508      _Py_find__doc__},
2509     BYTES_FROMHEX_METHODDEF
2510     BYTES_HEX_METHODDEF
2511     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2512     {"isalnum", stringlib_isalnum, METH_NOARGS,
2513      _Py_isalnum__doc__},
2514     {"isalpha", stringlib_isalpha, METH_NOARGS,
2515      _Py_isalpha__doc__},
2516     {"isascii", stringlib_isascii, METH_NOARGS,
2517      _Py_isascii__doc__},
2518     {"isdigit", stringlib_isdigit, METH_NOARGS,
2519      _Py_isdigit__doc__},
2520     {"islower", stringlib_islower, METH_NOARGS,
2521      _Py_islower__doc__},
2522     {"isspace", stringlib_isspace, METH_NOARGS,
2523      _Py_isspace__doc__},
2524     {"istitle", stringlib_istitle, METH_NOARGS,
2525      _Py_istitle__doc__},
2526     {"isupper", stringlib_isupper, METH_NOARGS,
2527      _Py_isupper__doc__},
2528     BYTES_JOIN_METHODDEF
2529     STRINGLIB_LJUST_METHODDEF
2530     {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2531     BYTES_LSTRIP_METHODDEF
2532     BYTES_MAKETRANS_METHODDEF
2533     BYTES_PARTITION_METHODDEF
2534     BYTES_REPLACE_METHODDEF
2535     BYTES_REMOVEPREFIX_METHODDEF
2536     BYTES_REMOVESUFFIX_METHODDEF
2537     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2538     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2539     STRINGLIB_RJUST_METHODDEF
2540     BYTES_RPARTITION_METHODDEF
2541     BYTES_RSPLIT_METHODDEF
2542     BYTES_RSTRIP_METHODDEF
2543     BYTES_SPLIT_METHODDEF
2544     BYTES_SPLITLINES_METHODDEF
2545     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2546      _Py_startswith__doc__},
2547     BYTES_STRIP_METHODDEF
2548     {"swapcase", stringlib_swapcase, METH_NOARGS,
2549      _Py_swapcase__doc__},
2550     {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2551     BYTES_TRANSLATE_METHODDEF
2552     {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2553     STRINGLIB_ZFILL_METHODDEF
2554     {NULL,     NULL}                         /* sentinel */
2555 };
2556 
2557 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2558 bytes_mod(PyObject *self, PyObject *arg)
2559 {
2560     if (!PyBytes_Check(self)) {
2561         Py_RETURN_NOTIMPLEMENTED;
2562     }
2563     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2564                              arg, 0);
2565 }
2566 
2567 static PyNumberMethods bytes_as_number = {
2568     0,              /*nb_add*/
2569     0,              /*nb_subtract*/
2570     0,              /*nb_multiply*/
2571     bytes_mod,      /*nb_remainder*/
2572 };
2573 
2574 static PyObject *
2575 bytes_subtype_new(PyTypeObject *, PyObject *);
2576 
2577 /*[clinic input]
2578 @classmethod
2579 bytes.__new__ as bytes_new
2580 
2581     source as x: object = NULL
2582     encoding: str = NULL
2583     errors: str = NULL
2584 
2585 [clinic start generated code]*/
2586 
2587 static PyObject *
bytes_new_impl(PyTypeObject * type,PyObject * x,const char * encoding,const char * errors)2588 bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2589                const char *errors)
2590 /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2591 {
2592     PyObject *bytes;
2593     PyObject *func;
2594     Py_ssize_t size;
2595 
2596     if (x == NULL) {
2597         if (encoding != NULL || errors != NULL) {
2598             PyErr_SetString(PyExc_TypeError,
2599                             encoding != NULL ?
2600                             "encoding without a string argument" :
2601                             "errors without a string argument");
2602             return NULL;
2603         }
2604         bytes = PyBytes_FromStringAndSize(NULL, 0);
2605     }
2606     else if (encoding != NULL) {
2607         /* Encode via the codec registry */
2608         if (!PyUnicode_Check(x)) {
2609             PyErr_SetString(PyExc_TypeError,
2610                             "encoding without a string argument");
2611             return NULL;
2612         }
2613         bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2614     }
2615     else if (errors != NULL) {
2616         PyErr_SetString(PyExc_TypeError,
2617                         PyUnicode_Check(x) ?
2618                         "string argument without an encoding" :
2619                         "errors without a string argument");
2620         return NULL;
2621     }
2622     /* We'd like to call PyObject_Bytes here, but we need to check for an
2623        integer argument before deferring to PyBytes_FromObject, something
2624        PyObject_Bytes doesn't do. */
2625     else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2626         bytes = _PyObject_CallNoArgs(func);
2627         Py_DECREF(func);
2628         if (bytes == NULL)
2629             return NULL;
2630         if (!PyBytes_Check(bytes)) {
2631             PyErr_Format(PyExc_TypeError,
2632                         "__bytes__ returned non-bytes (type %.200s)",
2633                         Py_TYPE(bytes)->tp_name);
2634             Py_DECREF(bytes);
2635             return NULL;
2636         }
2637     }
2638     else if (PyErr_Occurred())
2639         return NULL;
2640     else if (PyUnicode_Check(x)) {
2641         PyErr_SetString(PyExc_TypeError,
2642                         "string argument without an encoding");
2643         return NULL;
2644     }
2645     /* Is it an integer? */
2646     else if (_PyIndex_Check(x)) {
2647         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2648         if (size == -1 && PyErr_Occurred()) {
2649             if (!PyErr_ExceptionMatches(PyExc_TypeError))
2650                 return NULL;
2651             PyErr_Clear();  /* fall through */
2652             bytes = PyBytes_FromObject(x);
2653         }
2654         else {
2655             if (size < 0) {
2656                 PyErr_SetString(PyExc_ValueError, "negative count");
2657                 return NULL;
2658             }
2659             bytes = _PyBytes_FromSize(size, 1);
2660         }
2661     }
2662     else {
2663         bytes = PyBytes_FromObject(x);
2664     }
2665 
2666     if (bytes != NULL && type != &PyBytes_Type) {
2667         Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2668     }
2669 
2670     return bytes;
2671 }
2672 
2673 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2674 _PyBytes_FromBuffer(PyObject *x)
2675 {
2676     PyObject *new;
2677     Py_buffer view;
2678 
2679     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2680         return NULL;
2681 
2682     new = PyBytes_FromStringAndSize(NULL, view.len);
2683     if (!new)
2684         goto fail;
2685     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2686                 &view, view.len, 'C') < 0)
2687         goto fail;
2688     PyBuffer_Release(&view);
2689     return new;
2690 
2691 fail:
2692     Py_XDECREF(new);
2693     PyBuffer_Release(&view);
2694     return NULL;
2695 }
2696 
2697 static PyObject*
_PyBytes_FromList(PyObject * x)2698 _PyBytes_FromList(PyObject *x)
2699 {
2700     Py_ssize_t i, size = PyList_GET_SIZE(x);
2701     Py_ssize_t value;
2702     char *str;
2703     PyObject *item;
2704     _PyBytesWriter writer;
2705 
2706     _PyBytesWriter_Init(&writer);
2707     str = _PyBytesWriter_Alloc(&writer, size);
2708     if (str == NULL)
2709         return NULL;
2710     writer.overallocate = 1;
2711     size = writer.allocated;
2712 
2713     for (i = 0; i < PyList_GET_SIZE(x); i++) {
2714         item = PyList_GET_ITEM(x, i);
2715         Py_INCREF(item);
2716         value = PyNumber_AsSsize_t(item, NULL);
2717         Py_DECREF(item);
2718         if (value == -1 && PyErr_Occurred())
2719             goto error;
2720 
2721         if (value < 0 || value >= 256) {
2722             PyErr_SetString(PyExc_ValueError,
2723                             "bytes must be in range(0, 256)");
2724             goto error;
2725         }
2726 
2727         if (i >= size) {
2728             str = _PyBytesWriter_Resize(&writer, str, size+1);
2729             if (str == NULL)
2730                 return NULL;
2731             size = writer.allocated;
2732         }
2733         *str++ = (char) value;
2734     }
2735     return _PyBytesWriter_Finish(&writer, str);
2736 
2737   error:
2738     _PyBytesWriter_Dealloc(&writer);
2739     return NULL;
2740 }
2741 
2742 static PyObject*
_PyBytes_FromTuple(PyObject * x)2743 _PyBytes_FromTuple(PyObject *x)
2744 {
2745     PyObject *bytes;
2746     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2747     Py_ssize_t value;
2748     char *str;
2749     PyObject *item;
2750 
2751     bytes = PyBytes_FromStringAndSize(NULL, size);
2752     if (bytes == NULL)
2753         return NULL;
2754     str = ((PyBytesObject *)bytes)->ob_sval;
2755 
2756     for (i = 0; i < size; i++) {
2757         item = PyTuple_GET_ITEM(x, i);
2758         value = PyNumber_AsSsize_t(item, NULL);
2759         if (value == -1 && PyErr_Occurred())
2760             goto error;
2761 
2762         if (value < 0 || value >= 256) {
2763             PyErr_SetString(PyExc_ValueError,
2764                             "bytes must be in range(0, 256)");
2765             goto error;
2766         }
2767         *str++ = (char) value;
2768     }
2769     return bytes;
2770 
2771   error:
2772     Py_DECREF(bytes);
2773     return NULL;
2774 }
2775 
2776 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2777 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2778 {
2779     char *str;
2780     Py_ssize_t i, size;
2781     _PyBytesWriter writer;
2782 
2783     /* For iterator version, create a bytes object and resize as needed */
2784     size = PyObject_LengthHint(x, 64);
2785     if (size == -1 && PyErr_Occurred())
2786         return NULL;
2787 
2788     _PyBytesWriter_Init(&writer);
2789     str = _PyBytesWriter_Alloc(&writer, size);
2790     if (str == NULL)
2791         return NULL;
2792     writer.overallocate = 1;
2793     size = writer.allocated;
2794 
2795     /* Run the iterator to exhaustion */
2796     for (i = 0; ; i++) {
2797         PyObject *item;
2798         Py_ssize_t value;
2799 
2800         /* Get the next item */
2801         item = PyIter_Next(it);
2802         if (item == NULL) {
2803             if (PyErr_Occurred())
2804                 goto error;
2805             break;
2806         }
2807 
2808         /* Interpret it as an int (__index__) */
2809         value = PyNumber_AsSsize_t(item, NULL);
2810         Py_DECREF(item);
2811         if (value == -1 && PyErr_Occurred())
2812             goto error;
2813 
2814         /* Range check */
2815         if (value < 0 || value >= 256) {
2816             PyErr_SetString(PyExc_ValueError,
2817                             "bytes must be in range(0, 256)");
2818             goto error;
2819         }
2820 
2821         /* Append the byte */
2822         if (i >= size) {
2823             str = _PyBytesWriter_Resize(&writer, str, size+1);
2824             if (str == NULL)
2825                 return NULL;
2826             size = writer.allocated;
2827         }
2828         *str++ = (char) value;
2829     }
2830 
2831     return _PyBytesWriter_Finish(&writer, str);
2832 
2833   error:
2834     _PyBytesWriter_Dealloc(&writer);
2835     return NULL;
2836 }
2837 
2838 PyObject *
PyBytes_FromObject(PyObject * x)2839 PyBytes_FromObject(PyObject *x)
2840 {
2841     PyObject *it, *result;
2842 
2843     if (x == NULL) {
2844         PyErr_BadInternalCall();
2845         return NULL;
2846     }
2847 
2848     if (PyBytes_CheckExact(x)) {
2849         Py_INCREF(x);
2850         return x;
2851     }
2852 
2853     /* Use the modern buffer interface */
2854     if (PyObject_CheckBuffer(x))
2855         return _PyBytes_FromBuffer(x);
2856 
2857     if (PyList_CheckExact(x))
2858         return _PyBytes_FromList(x);
2859 
2860     if (PyTuple_CheckExact(x))
2861         return _PyBytes_FromTuple(x);
2862 
2863     if (!PyUnicode_Check(x)) {
2864         it = PyObject_GetIter(x);
2865         if (it != NULL) {
2866             result = _PyBytes_FromIterator(it, x);
2867             Py_DECREF(it);
2868             return result;
2869         }
2870         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2871             return NULL;
2872         }
2873     }
2874 
2875     PyErr_Format(PyExc_TypeError,
2876                  "cannot convert '%.200s' object to bytes",
2877                  Py_TYPE(x)->tp_name);
2878     return NULL;
2879 }
2880 
2881 /* This allocator is needed for subclasses don't want to use __new__.
2882  * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
2883  *
2884  * This allocator will be removed when ob_shash is removed.
2885  */
2886 static PyObject *
bytes_alloc(PyTypeObject * self,Py_ssize_t nitems)2887 bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
2888 {
2889     PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
2890     if (obj == NULL) {
2891         return NULL;
2892     }
2893 _Py_COMP_DIAG_PUSH
2894 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2895     obj->ob_shash = -1;
2896 _Py_COMP_DIAG_POP
2897     return (PyObject*)obj;
2898 }
2899 
2900 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * tmp)2901 bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2902 {
2903     PyObject *pnew;
2904     Py_ssize_t n;
2905 
2906     assert(PyType_IsSubtype(type, &PyBytes_Type));
2907     assert(PyBytes_Check(tmp));
2908     n = PyBytes_GET_SIZE(tmp);
2909     pnew = type->tp_alloc(type, n);
2910     if (pnew != NULL) {
2911         memcpy(PyBytes_AS_STRING(pnew),
2912                   PyBytes_AS_STRING(tmp), n+1);
2913 _Py_COMP_DIAG_PUSH
2914 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2915         ((PyBytesObject *)pnew)->ob_shash =
2916             ((PyBytesObject *)tmp)->ob_shash;
2917 _Py_COMP_DIAG_POP
2918     }
2919     return pnew;
2920 }
2921 
2922 PyDoc_STRVAR(bytes_doc,
2923 "bytes(iterable_of_ints) -> bytes\n\
2924 bytes(string, encoding[, errors]) -> bytes\n\
2925 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2926 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2927 bytes() -> empty bytes object\n\
2928 \n\
2929 Construct an immutable array of bytes from:\n\
2930   - an iterable yielding integers in range(256)\n\
2931   - a text string encoded using the specified encoding\n\
2932   - any object implementing the buffer API.\n\
2933   - an integer");
2934 
2935 static PyObject *bytes_iter(PyObject *seq);
2936 
2937 PyTypeObject PyBytes_Type = {
2938     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2939     "bytes",
2940     PyBytesObject_SIZE,
2941     sizeof(char),
2942     0,                                          /* tp_dealloc */
2943     0,                                          /* tp_vectorcall_offset */
2944     0,                                          /* tp_getattr */
2945     0,                                          /* tp_setattr */
2946     0,                                          /* tp_as_async */
2947     (reprfunc)bytes_repr,                       /* tp_repr */
2948     &bytes_as_number,                           /* tp_as_number */
2949     &bytes_as_sequence,                         /* tp_as_sequence */
2950     &bytes_as_mapping,                          /* tp_as_mapping */
2951     (hashfunc)bytes_hash,                       /* tp_hash */
2952     0,                                          /* tp_call */
2953     bytes_str,                                  /* tp_str */
2954     PyObject_GenericGetAttr,                    /* tp_getattro */
2955     0,                                          /* tp_setattro */
2956     &bytes_as_buffer,                           /* tp_as_buffer */
2957     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2958         Py_TPFLAGS_BYTES_SUBCLASS |
2959         _Py_TPFLAGS_MATCH_SELF,               /* tp_flags */
2960     bytes_doc,                                  /* tp_doc */
2961     0,                                          /* tp_traverse */
2962     0,                                          /* tp_clear */
2963     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2964     0,                                          /* tp_weaklistoffset */
2965     bytes_iter,                                 /* tp_iter */
2966     0,                                          /* tp_iternext */
2967     bytes_methods,                              /* tp_methods */
2968     0,                                          /* tp_members */
2969     0,                                          /* tp_getset */
2970     0,                                          /* tp_base */
2971     0,                                          /* tp_dict */
2972     0,                                          /* tp_descr_get */
2973     0,                                          /* tp_descr_set */
2974     0,                                          /* tp_dictoffset */
2975     0,                                          /* tp_init */
2976     bytes_alloc,                                /* tp_alloc */
2977     bytes_new,                                  /* tp_new */
2978     PyObject_Del,                               /* tp_free */
2979 };
2980 
2981 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2982 PyBytes_Concat(PyObject **pv, PyObject *w)
2983 {
2984     assert(pv != NULL);
2985     if (*pv == NULL)
2986         return;
2987     if (w == NULL) {
2988         Py_CLEAR(*pv);
2989         return;
2990     }
2991 
2992     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2993         /* Only one reference, so we can resize in place */
2994         Py_ssize_t oldsize;
2995         Py_buffer wb;
2996 
2997         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2998             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2999                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3000             Py_CLEAR(*pv);
3001             return;
3002         }
3003 
3004         oldsize = PyBytes_GET_SIZE(*pv);
3005         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3006             PyErr_NoMemory();
3007             goto error;
3008         }
3009         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3010             goto error;
3011 
3012         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3013         PyBuffer_Release(&wb);
3014         return;
3015 
3016       error:
3017         PyBuffer_Release(&wb);
3018         Py_CLEAR(*pv);
3019         return;
3020     }
3021 
3022     else {
3023         /* Multiple references, need to create new object */
3024         PyObject *v;
3025         v = bytes_concat(*pv, w);
3026         Py_SETREF(*pv, v);
3027     }
3028 }
3029 
3030 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)3031 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3032 {
3033     PyBytes_Concat(pv, w);
3034     Py_XDECREF(w);
3035 }
3036 
3037 
3038 /* The following function breaks the notion that bytes are immutable:
3039    it changes the size of a bytes object.  We get away with this only if there
3040    is only one module referencing the object.  You can also think of it
3041    as creating a new bytes object and destroying the old one, only
3042    more efficiently.  In any case, don't use this if the bytes object may
3043    already be known to some other part of the code...
3044    Note that if there's not enough memory to resize the bytes object, the
3045    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3046    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3047    returned, and the value in *pv may or may not be the same as on input.
3048    As always, an extra byte is allocated for a trailing \0 byte (newsize
3049    does *not* include that), and a trailing \0 byte is stored.
3050 */
3051 
3052 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3053 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3054 {
3055     PyObject *v;
3056     PyBytesObject *sv;
3057     v = *pv;
3058     if (!PyBytes_Check(v) || newsize < 0) {
3059         goto error;
3060     }
3061     if (Py_SIZE(v) == newsize) {
3062         /* return early if newsize equals to v->ob_size */
3063         return 0;
3064     }
3065     if (Py_SIZE(v) == 0) {
3066         if (newsize == 0) {
3067             return 0;
3068         }
3069         *pv = _PyBytes_FromSize(newsize, 0);
3070         Py_DECREF(v);
3071         return (*pv == NULL) ? -1 : 0;
3072     }
3073     if (Py_REFCNT(v) != 1) {
3074         goto error;
3075     }
3076     if (newsize == 0) {
3077         *pv = bytes_new_empty();
3078         Py_DECREF(v);
3079         return 0;
3080     }
3081     /* XXX UNREF/NEWREF interface should be more symmetrical */
3082 #ifdef Py_REF_DEBUG
3083     _Py_RefTotal--;
3084 #endif
3085 #ifdef Py_TRACE_REFS
3086     _Py_ForgetReference(v);
3087 #endif
3088     *pv = (PyObject *)
3089         PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3090     if (*pv == NULL) {
3091         PyObject_Free(v);
3092         PyErr_NoMemory();
3093         return -1;
3094     }
3095     _Py_NewReference(*pv);
3096     sv = (PyBytesObject *) *pv;
3097     Py_SET_SIZE(sv, newsize);
3098     sv->ob_sval[newsize] = '\0';
3099 _Py_COMP_DIAG_PUSH
3100 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
3101     sv->ob_shash = -1;          /* invalidate cached hash value */
3102 _Py_COMP_DIAG_POP
3103     return 0;
3104 error:
3105     *pv = 0;
3106     Py_DECREF(v);
3107     PyErr_BadInternalCall();
3108     return -1;
3109 }
3110 
3111 
3112 PyStatus
_PyBytes_InitTypes(PyInterpreterState * interp)3113 _PyBytes_InitTypes(PyInterpreterState *interp)
3114 {
3115     if (!_Py_IsMainInterpreter(interp)) {
3116         return _PyStatus_OK();
3117     }
3118 
3119     if (PyType_Ready(&PyBytes_Type) < 0) {
3120         return _PyStatus_ERR("Can't initialize bytes type");
3121     }
3122 
3123     if (PyType_Ready(&PyBytesIter_Type) < 0) {
3124         return _PyStatus_ERR("Can't initialize bytes iterator type");
3125     }
3126 
3127     return _PyStatus_OK();
3128 }
3129 
3130 
3131 /*********************** Bytes Iterator ****************************/
3132 
3133 typedef struct {
3134     PyObject_HEAD
3135     Py_ssize_t it_index;
3136     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3137 } striterobject;
3138 
3139 static void
striter_dealloc(striterobject * it)3140 striter_dealloc(striterobject *it)
3141 {
3142     _PyObject_GC_UNTRACK(it);
3143     Py_XDECREF(it->it_seq);
3144     PyObject_GC_Del(it);
3145 }
3146 
3147 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3148 striter_traverse(striterobject *it, visitproc visit, void *arg)
3149 {
3150     Py_VISIT(it->it_seq);
3151     return 0;
3152 }
3153 
3154 static PyObject *
striter_next(striterobject * it)3155 striter_next(striterobject *it)
3156 {
3157     PyBytesObject *seq;
3158 
3159     assert(it != NULL);
3160     seq = it->it_seq;
3161     if (seq == NULL)
3162         return NULL;
3163     assert(PyBytes_Check(seq));
3164 
3165     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3166         return _PyLong_FromUnsignedChar(
3167             (unsigned char)seq->ob_sval[it->it_index++]);
3168     }
3169 
3170     it->it_seq = NULL;
3171     Py_DECREF(seq);
3172     return NULL;
3173 }
3174 
3175 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3176 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3177 {
3178     Py_ssize_t len = 0;
3179     if (it->it_seq)
3180         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3181     return PyLong_FromSsize_t(len);
3182 }
3183 
3184 PyDoc_STRVAR(length_hint_doc,
3185              "Private method returning an estimate of len(list(it)).");
3186 
3187 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3188 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3189 {
3190     PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3191 
3192     /* _PyEval_GetBuiltin can invoke arbitrary code,
3193      * call must be before access of iterator pointers.
3194      * see issue #101765 */
3195 
3196     if (it->it_seq != NULL) {
3197         return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3198     } else {
3199         return Py_BuildValue("N(())", iter);
3200     }
3201 }
3202 
3203 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3204 
3205 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3206 striter_setstate(striterobject *it, PyObject *state)
3207 {
3208     Py_ssize_t index = PyLong_AsSsize_t(state);
3209     if (index == -1 && PyErr_Occurred())
3210         return NULL;
3211     if (it->it_seq != NULL) {
3212         if (index < 0)
3213             index = 0;
3214         else if (index > PyBytes_GET_SIZE(it->it_seq))
3215             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3216         it->it_index = index;
3217     }
3218     Py_RETURN_NONE;
3219 }
3220 
3221 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3222 
3223 static PyMethodDef striter_methods[] = {
3224     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3225      length_hint_doc},
3226     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3227      reduce_doc},
3228     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3229      setstate_doc},
3230     {NULL,              NULL}           /* sentinel */
3231 };
3232 
3233 PyTypeObject PyBytesIter_Type = {
3234     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3235     "bytes_iterator",                           /* tp_name */
3236     sizeof(striterobject),                      /* tp_basicsize */
3237     0,                                          /* tp_itemsize */
3238     /* methods */
3239     (destructor)striter_dealloc,                /* tp_dealloc */
3240     0,                                          /* tp_vectorcall_offset */
3241     0,                                          /* tp_getattr */
3242     0,                                          /* tp_setattr */
3243     0,                                          /* tp_as_async */
3244     0,                                          /* tp_repr */
3245     0,                                          /* tp_as_number */
3246     0,                                          /* tp_as_sequence */
3247     0,                                          /* tp_as_mapping */
3248     0,                                          /* tp_hash */
3249     0,                                          /* tp_call */
3250     0,                                          /* tp_str */
3251     PyObject_GenericGetAttr,                    /* tp_getattro */
3252     0,                                          /* tp_setattro */
3253     0,                                          /* tp_as_buffer */
3254     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3255     0,                                          /* tp_doc */
3256     (traverseproc)striter_traverse,     /* tp_traverse */
3257     0,                                          /* tp_clear */
3258     0,                                          /* tp_richcompare */
3259     0,                                          /* tp_weaklistoffset */
3260     PyObject_SelfIter,                          /* tp_iter */
3261     (iternextfunc)striter_next,                 /* tp_iternext */
3262     striter_methods,                            /* tp_methods */
3263     0,
3264 };
3265 
3266 static PyObject *
bytes_iter(PyObject * seq)3267 bytes_iter(PyObject *seq)
3268 {
3269     striterobject *it;
3270 
3271     if (!PyBytes_Check(seq)) {
3272         PyErr_BadInternalCall();
3273         return NULL;
3274     }
3275     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3276     if (it == NULL)
3277         return NULL;
3278     it->it_index = 0;
3279     Py_INCREF(seq);
3280     it->it_seq = (PyBytesObject *)seq;
3281     _PyObject_GC_TRACK(it);
3282     return (PyObject *)it;
3283 }
3284 
3285 
3286 /* _PyBytesWriter API */
3287 
3288 #ifdef MS_WINDOWS
3289    /* On Windows, overallocate by 50% is the best factor */
3290 #  define OVERALLOCATE_FACTOR 2
3291 #else
3292    /* On Linux, overallocate by 25% is the best factor */
3293 #  define OVERALLOCATE_FACTOR 4
3294 #endif
3295 
3296 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3297 _PyBytesWriter_Init(_PyBytesWriter *writer)
3298 {
3299     /* Set all attributes before small_buffer to 0 */
3300     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3301 #ifndef NDEBUG
3302     memset(writer->small_buffer, PYMEM_CLEANBYTE,
3303            sizeof(writer->small_buffer));
3304 #endif
3305 }
3306 
3307 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3308 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3309 {
3310     Py_CLEAR(writer->buffer);
3311 }
3312 
3313 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3314 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3315 {
3316     if (writer->use_small_buffer) {
3317         assert(writer->buffer == NULL);
3318         return writer->small_buffer;
3319     }
3320     else if (writer->use_bytearray) {
3321         assert(writer->buffer != NULL);
3322         return PyByteArray_AS_STRING(writer->buffer);
3323     }
3324     else {
3325         assert(writer->buffer != NULL);
3326         return PyBytes_AS_STRING(writer->buffer);
3327     }
3328 }
3329 
3330 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3331 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3332 {
3333     const char *start = _PyBytesWriter_AsString(writer);
3334     assert(str != NULL);
3335     assert(str >= start);
3336     assert(str - start <= writer->allocated);
3337     return str - start;
3338 }
3339 
3340 #ifndef NDEBUG
3341 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3342 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3343 {
3344     const char *start, *end;
3345 
3346     if (writer->use_small_buffer) {
3347         assert(writer->buffer == NULL);
3348     }
3349     else {
3350         assert(writer->buffer != NULL);
3351         if (writer->use_bytearray)
3352             assert(PyByteArray_CheckExact(writer->buffer));
3353         else
3354             assert(PyBytes_CheckExact(writer->buffer));
3355         assert(Py_REFCNT(writer->buffer) == 1);
3356     }
3357 
3358     if (writer->use_bytearray) {
3359         /* bytearray has its own overallocation algorithm,
3360            writer overallocation must be disabled */
3361         assert(!writer->overallocate);
3362     }
3363 
3364     assert(0 <= writer->allocated);
3365     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3366     /* the last byte must always be null */
3367     start = _PyBytesWriter_AsString(writer);
3368     assert(start[writer->allocated] == 0);
3369 
3370     end = start + writer->allocated;
3371     assert(str != NULL);
3372     assert(start <= str && str <= end);
3373     return 1;
3374 }
3375 #endif
3376 
3377 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3378 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3379 {
3380     Py_ssize_t allocated, pos;
3381 
3382     assert(_PyBytesWriter_CheckConsistency(writer, str));
3383     assert(writer->allocated < size);
3384 
3385     allocated = size;
3386     if (writer->overallocate
3387         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3388         /* overallocate to limit the number of realloc() */
3389         allocated += allocated / OVERALLOCATE_FACTOR;
3390     }
3391 
3392     pos = _PyBytesWriter_GetSize(writer, str);
3393     if (!writer->use_small_buffer) {
3394         if (writer->use_bytearray) {
3395             if (PyByteArray_Resize(writer->buffer, allocated))
3396                 goto error;
3397             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3398                but we cannot use ob_alloc because bytes may need to be moved
3399                to use the whole buffer. bytearray uses an internal optimization
3400                to avoid moving or copying bytes when bytes are removed at the
3401                beginning (ex: del bytearray[:1]). */
3402         }
3403         else {
3404             if (_PyBytes_Resize(&writer->buffer, allocated))
3405                 goto error;
3406         }
3407     }
3408     else {
3409         /* convert from stack buffer to bytes object buffer */
3410         assert(writer->buffer == NULL);
3411 
3412         if (writer->use_bytearray)
3413             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3414         else
3415             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3416         if (writer->buffer == NULL)
3417             goto error;
3418 
3419         if (pos != 0) {
3420             char *dest;
3421             if (writer->use_bytearray)
3422                 dest = PyByteArray_AS_STRING(writer->buffer);
3423             else
3424                 dest = PyBytes_AS_STRING(writer->buffer);
3425             memcpy(dest,
3426                       writer->small_buffer,
3427                       pos);
3428         }
3429 
3430         writer->use_small_buffer = 0;
3431 #ifndef NDEBUG
3432         memset(writer->small_buffer, PYMEM_CLEANBYTE,
3433                sizeof(writer->small_buffer));
3434 #endif
3435     }
3436     writer->allocated = allocated;
3437 
3438     str = _PyBytesWriter_AsString(writer) + pos;
3439     assert(_PyBytesWriter_CheckConsistency(writer, str));
3440     return str;
3441 
3442 error:
3443     _PyBytesWriter_Dealloc(writer);
3444     return NULL;
3445 }
3446 
3447 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3448 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3449 {
3450     Py_ssize_t new_min_size;
3451 
3452     assert(_PyBytesWriter_CheckConsistency(writer, str));
3453     assert(size >= 0);
3454 
3455     if (size == 0) {
3456         /* nothing to do */
3457         return str;
3458     }
3459 
3460     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3461         PyErr_NoMemory();
3462         _PyBytesWriter_Dealloc(writer);
3463         return NULL;
3464     }
3465     new_min_size = writer->min_size + size;
3466 
3467     if (new_min_size > writer->allocated)
3468         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3469 
3470     writer->min_size = new_min_size;
3471     return str;
3472 }
3473 
3474 /* Allocate the buffer to write size bytes.
3475    Return the pointer to the beginning of buffer data.
3476    Raise an exception and return NULL on error. */
3477 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3478 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3479 {
3480     /* ensure that _PyBytesWriter_Alloc() is only called once */
3481     assert(writer->min_size == 0 && writer->buffer == NULL);
3482     assert(size >= 0);
3483 
3484     writer->use_small_buffer = 1;
3485 #ifndef NDEBUG
3486     writer->allocated = sizeof(writer->small_buffer) - 1;
3487     /* In debug mode, don't use the full small buffer because it is less
3488        efficient than bytes and bytearray objects to detect buffer underflow
3489        and buffer overflow. Use 10 bytes of the small buffer to test also
3490        code using the smaller buffer in debug mode.
3491 
3492        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3493        in debug mode to also be able to detect stack overflow when running
3494        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3495        if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3496        stack overflow. */
3497     writer->allocated = Py_MIN(writer->allocated, 10);
3498     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3499        to detect buffer overflow */
3500     writer->small_buffer[writer->allocated] = 0;
3501 #else
3502     writer->allocated = sizeof(writer->small_buffer);
3503 #endif
3504     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3505 }
3506 
3507 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3508 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3509 {
3510     Py_ssize_t size;
3511     PyObject *result;
3512 
3513     assert(_PyBytesWriter_CheckConsistency(writer, str));
3514 
3515     size = _PyBytesWriter_GetSize(writer, str);
3516     if (size == 0 && !writer->use_bytearray) {
3517         Py_CLEAR(writer->buffer);
3518         /* Get the empty byte string singleton */
3519         result = PyBytes_FromStringAndSize(NULL, 0);
3520     }
3521     else if (writer->use_small_buffer) {
3522         if (writer->use_bytearray) {
3523             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3524         }
3525         else {
3526             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3527         }
3528     }
3529     else {
3530         result = writer->buffer;
3531         writer->buffer = NULL;
3532 
3533         if (size != writer->allocated) {
3534             if (writer->use_bytearray) {
3535                 if (PyByteArray_Resize(result, size)) {
3536                     Py_DECREF(result);
3537                     return NULL;
3538                 }
3539             }
3540             else {
3541                 if (_PyBytes_Resize(&result, size)) {
3542                     assert(result == NULL);
3543                     return NULL;
3544                 }
3545             }
3546         }
3547     }
3548     return result;
3549 }
3550 
3551 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3552 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3553                           const void *bytes, Py_ssize_t size)
3554 {
3555     char *str = (char *)ptr;
3556 
3557     str = _PyBytesWriter_Prepare(writer, str, size);
3558     if (str == NULL)
3559         return NULL;
3560 
3561     memcpy(str, bytes, size);
3562     str += size;
3563 
3564     return str;
3565 }
3566 
3567 
3568 void
_PyBytes_Repeat(char * dest,Py_ssize_t len_dest,const char * src,Py_ssize_t len_src)3569 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3570     const char* src, Py_ssize_t len_src)
3571 {
3572     if (len_dest == 0) {
3573         return;
3574     }
3575     if (len_src == 1) {
3576         memset(dest, src[0], len_dest);
3577     }
3578     else {
3579         if (src != dest) {
3580             memcpy(dest, src, len_src);
3581         }
3582         Py_ssize_t copied = len_src;
3583         while (copied < len_dest) {
3584             Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3585             memcpy(dest + copied, dest, bytes_to_copy);
3586             copied += bytes_to_copy;
3587         }
3588     }
3589 }
3590 
3591