1 /* bytes object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "pycore_abstract.h" // _PyIndex_Check()
7 #include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat()
8 #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
9 #include "pycore_call.h" // _PyObject_CallNoArgs()
10 #include "pycore_format.h" // F_LJUST
11 #include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT()
12 #include "pycore_initconfig.h" // _PyStatus_OK()
13 #include "pycore_long.h" // _PyLong_DigitValue
14 #include "pycore_object.h" // _PyObject_GC_TRACK
15 #include "pycore_pymem.h" // PYMEM_CLEANBYTE
16 #include "pycore_strhex.h" // _Py_strhex_with_sep()
17
18 #include <stddef.h>
19
20 /*[clinic input]
21 class bytes "PyBytesObject *" "&PyBytes_Type"
22 [clinic start generated code]*/
23 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24
25 #include "clinic/bytesobject.c.h"
26
27 /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28 for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 or 7 bytes per bytes object allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
39
40 #define CHARACTERS _Py_SINGLETON(bytes_characters)
41 #define CHARACTER(ch) \
42 ((PyBytesObject *)&(CHARACTERS[ch]));
43 #define EMPTY (&_Py_SINGLETON(bytes_empty))
44
45
46 // Return a borrowed reference to the empty bytes string singleton.
bytes_get_empty(void)47 static inline PyObject* bytes_get_empty(void)
48 {
49 return &EMPTY->ob_base.ob_base;
50 }
51
52
53 // Return a strong reference to the empty bytes string singleton.
bytes_new_empty(void)54 static inline PyObject* bytes_new_empty(void)
55 {
56 Py_INCREF(EMPTY);
57 return (PyObject *)EMPTY;
58 }
59
60
61 /*
62 For PyBytes_FromString(), the parameter `str' points to a null-terminated
63 string containing exactly `size' bytes.
64
65 For PyBytes_FromStringAndSize(), the parameter `str' is
66 either NULL or else points to a string containing at least `size' bytes.
67 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
68 not have to be null-terminated. (Therefore it is safe to construct a
69 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
70 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
71 bytes (setting the last byte to the null terminating character) and you can
72 fill in the data yourself. If `str' is non-NULL then the resulting
73 PyBytes object must be treated as immutable and you must not fill in nor
74 alter the data yourself, since the strings may be shared.
75
76 The PyObject member `op->ob_size', which denotes the number of "extra
77 items" in a variable-size object, will contain the number of bytes
78 allocated for string data, not counting the null terminating character.
79 It is therefore equal to the `size' parameter (for
80 PyBytes_FromStringAndSize()) or the length of the string in the `str'
81 parameter (for PyBytes_FromString()).
82 */
83 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)84 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
85 {
86 PyBytesObject *op;
87 assert(size >= 0);
88
89 if (size == 0) {
90 return bytes_new_empty();
91 }
92
93 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
94 PyErr_SetString(PyExc_OverflowError,
95 "byte string is too large");
96 return NULL;
97 }
98
99 /* Inline PyObject_NewVar */
100 if (use_calloc)
101 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102 else
103 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
104 if (op == NULL) {
105 return PyErr_NoMemory();
106 }
107 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
108 _Py_COMP_DIAG_PUSH
109 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
110 op->ob_shash = -1;
111 _Py_COMP_DIAG_POP
112 if (!use_calloc) {
113 op->ob_sval[size] = '\0';
114 }
115 return (PyObject *) op;
116 }
117
118 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)119 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120 {
121 PyBytesObject *op;
122 if (size < 0) {
123 PyErr_SetString(PyExc_SystemError,
124 "Negative size passed to PyBytes_FromStringAndSize");
125 return NULL;
126 }
127 if (size == 1 && str != NULL) {
128 op = CHARACTER(*str & 255);
129 Py_INCREF(op);
130 return (PyObject *)op;
131 }
132 if (size == 0) {
133 return bytes_new_empty();
134 }
135
136 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137 if (op == NULL)
138 return NULL;
139 if (str == NULL)
140 return (PyObject *) op;
141
142 memcpy(op->ob_sval, str, size);
143 return (PyObject *) op;
144 }
145
146 PyObject *
PyBytes_FromString(const char * str)147 PyBytes_FromString(const char *str)
148 {
149 size_t size;
150 PyBytesObject *op;
151
152 assert(str != NULL);
153 size = strlen(str);
154 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
155 PyErr_SetString(PyExc_OverflowError,
156 "byte string is too long");
157 return NULL;
158 }
159
160 if (size == 0) {
161 return bytes_new_empty();
162 }
163 else if (size == 1) {
164 op = CHARACTER(*str & 255);
165 Py_INCREF(op);
166 return (PyObject *)op;
167 }
168
169 /* Inline PyObject_NewVar */
170 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
171 if (op == NULL) {
172 return PyErr_NoMemory();
173 }
174 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
175 _Py_COMP_DIAG_PUSH
176 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
177 op->ob_shash = -1;
178 _Py_COMP_DIAG_POP
179 memcpy(op->ob_sval, str, size+1);
180 return (PyObject *) op;
181 }
182
183 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)184 PyBytes_FromFormatV(const char *format, va_list vargs)
185 {
186 char *s;
187 const char *f;
188 const char *p;
189 Py_ssize_t prec;
190 int longflag;
191 int size_tflag;
192 /* Longest 64-bit formatted numbers:
193 - "18446744073709551615\0" (21 bytes)
194 - "-9223372036854775808\0" (21 bytes)
195 Decimal takes the most space (it isn't enough for octal.)
196
197 Longest 64-bit pointer representation:
198 "0xffffffffffffffff\0" (19 bytes). */
199 char buffer[21];
200 _PyBytesWriter writer;
201
202 _PyBytesWriter_Init(&writer);
203
204 s = _PyBytesWriter_Alloc(&writer, strlen(format));
205 if (s == NULL)
206 return NULL;
207 writer.overallocate = 1;
208
209 #define WRITE_BYTES(str) \
210 do { \
211 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
212 if (s == NULL) \
213 goto error; \
214 } while (0)
215
216 for (f = format; *f; f++) {
217 if (*f != '%') {
218 *s++ = *f;
219 continue;
220 }
221
222 p = f++;
223
224 /* ignore the width (ex: 10 in "%10s") */
225 while (Py_ISDIGIT(*f))
226 f++;
227
228 /* parse the precision (ex: 10 in "%.10s") */
229 prec = 0;
230 if (*f == '.') {
231 f++;
232 for (; Py_ISDIGIT(*f); f++) {
233 prec = (prec * 10) + (*f - '0');
234 }
235 }
236
237 while (*f && *f != '%' && !Py_ISALPHA(*f))
238 f++;
239
240 /* handle the long flag ('l'), but only for %ld and %lu.
241 others can be added when necessary. */
242 longflag = 0;
243 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
244 longflag = 1;
245 ++f;
246 }
247
248 /* handle the size_t flag ('z'). */
249 size_tflag = 0;
250 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
251 size_tflag = 1;
252 ++f;
253 }
254
255 /* subtract bytes preallocated for the format string
256 (ex: 2 for "%s") */
257 writer.min_size -= (f - p + 1);
258
259 switch (*f) {
260 case 'c':
261 {
262 int c = va_arg(vargs, int);
263 if (c < 0 || c > 255) {
264 PyErr_SetString(PyExc_OverflowError,
265 "PyBytes_FromFormatV(): %c format "
266 "expects an integer in range [0; 255]");
267 goto error;
268 }
269 writer.min_size++;
270 *s++ = (unsigned char)c;
271 break;
272 }
273
274 case 'd':
275 if (longflag) {
276 sprintf(buffer, "%ld", va_arg(vargs, long));
277 }
278 else if (size_tflag) {
279 sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
280 }
281 else {
282 sprintf(buffer, "%d", va_arg(vargs, int));
283 }
284 assert(strlen(buffer) < sizeof(buffer));
285 WRITE_BYTES(buffer);
286 break;
287
288 case 'u':
289 if (longflag) {
290 sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
291 }
292 else if (size_tflag) {
293 sprintf(buffer, "%zu", va_arg(vargs, size_t));
294 }
295 else {
296 sprintf(buffer, "%u", va_arg(vargs, unsigned int));
297 }
298 assert(strlen(buffer) < sizeof(buffer));
299 WRITE_BYTES(buffer);
300 break;
301
302 case 'i':
303 sprintf(buffer, "%i", va_arg(vargs, int));
304 assert(strlen(buffer) < sizeof(buffer));
305 WRITE_BYTES(buffer);
306 break;
307
308 case 'x':
309 sprintf(buffer, "%x", va_arg(vargs, int));
310 assert(strlen(buffer) < sizeof(buffer));
311 WRITE_BYTES(buffer);
312 break;
313
314 case 's':
315 {
316 Py_ssize_t i;
317
318 p = va_arg(vargs, const char*);
319 if (prec <= 0) {
320 i = strlen(p);
321 }
322 else {
323 i = 0;
324 while (i < prec && p[i]) {
325 i++;
326 }
327 }
328 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
329 if (s == NULL)
330 goto error;
331 break;
332 }
333
334 case 'p':
335 sprintf(buffer, "%p", va_arg(vargs, void*));
336 assert(strlen(buffer) < sizeof(buffer));
337 /* %p is ill-defined: ensure leading 0x. */
338 if (buffer[1] == 'X')
339 buffer[1] = 'x';
340 else if (buffer[1] != 'x') {
341 memmove(buffer+2, buffer, strlen(buffer)+1);
342 buffer[0] = '0';
343 buffer[1] = 'x';
344 }
345 WRITE_BYTES(buffer);
346 break;
347
348 case '%':
349 writer.min_size++;
350 *s++ = '%';
351 break;
352
353 default:
354 if (*f == 0) {
355 /* fix min_size if we reached the end of the format string */
356 writer.min_size++;
357 }
358
359 /* invalid format string: copy unformatted string and exit */
360 WRITE_BYTES(p);
361 return _PyBytesWriter_Finish(&writer, s);
362 }
363 }
364
365 #undef WRITE_BYTES
366
367 return _PyBytesWriter_Finish(&writer, s);
368
369 error:
370 _PyBytesWriter_Dealloc(&writer);
371 return NULL;
372 }
373
374 PyObject *
PyBytes_FromFormat(const char * format,...)375 PyBytes_FromFormat(const char *format, ...)
376 {
377 PyObject* ret;
378 va_list vargs;
379
380 #ifdef HAVE_STDARG_PROTOTYPES
381 va_start(vargs, format);
382 #else
383 va_start(vargs);
384 #endif
385 ret = PyBytes_FromFormatV(format, vargs);
386 va_end(vargs);
387 return ret;
388 }
389
390 /* Helpers for formatstring */
391
392 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)393 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
394 {
395 Py_ssize_t argidx = *p_argidx;
396 if (argidx < arglen) {
397 (*p_argidx)++;
398 if (arglen < 0)
399 return args;
400 else
401 return PyTuple_GetItem(args, argidx);
402 }
403 PyErr_SetString(PyExc_TypeError,
404 "not enough arguments for format string");
405 return NULL;
406 }
407
408 /* Returns a new reference to a PyBytes object, or NULL on failure. */
409
410 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)411 formatfloat(PyObject *v, int flags, int prec, int type,
412 PyObject **p_result, _PyBytesWriter *writer, char *str)
413 {
414 char *p;
415 PyObject *result;
416 double x;
417 size_t len;
418 int dtoa_flags = 0;
419
420 x = PyFloat_AsDouble(v);
421 if (x == -1.0 && PyErr_Occurred()) {
422 PyErr_Format(PyExc_TypeError, "float argument required, "
423 "not %.200s", Py_TYPE(v)->tp_name);
424 return NULL;
425 }
426
427 if (prec < 0)
428 prec = 6;
429
430 if (flags & F_ALT) {
431 dtoa_flags |= Py_DTSF_ALT;
432 }
433 p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
434
435 if (p == NULL)
436 return NULL;
437
438 len = strlen(p);
439 if (writer != NULL) {
440 str = _PyBytesWriter_Prepare(writer, str, len);
441 if (str == NULL) {
442 PyMem_Free(p);
443 return NULL;
444 }
445 memcpy(str, p, len);
446 PyMem_Free(p);
447 str += len;
448 return str;
449 }
450
451 result = PyBytes_FromStringAndSize(p, len);
452 PyMem_Free(p);
453 *p_result = result;
454 return result != NULL ? str : NULL;
455 }
456
457 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)458 formatlong(PyObject *v, int flags, int prec, int type)
459 {
460 PyObject *result, *iobj;
461 if (type == 'i')
462 type = 'd';
463 if (PyLong_Check(v))
464 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
465 if (PyNumber_Check(v)) {
466 /* make sure number is a type of integer for o, x, and X */
467 if (type == 'o' || type == 'x' || type == 'X')
468 iobj = _PyNumber_Index(v);
469 else
470 iobj = PyNumber_Long(v);
471 if (iobj != NULL) {
472 assert(PyLong_Check(iobj));
473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474 Py_DECREF(iobj);
475 return result;
476 }
477 if (!PyErr_ExceptionMatches(PyExc_TypeError))
478 return NULL;
479 }
480 PyErr_Format(PyExc_TypeError,
481 "%%%c format: %s is required, not %.200s", type,
482 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
483 : "a real number",
484 Py_TYPE(v)->tp_name);
485 return NULL;
486 }
487
488 static int
byte_converter(PyObject * arg,char * p)489 byte_converter(PyObject *arg, char *p)
490 {
491 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
492 *p = PyBytes_AS_STRING(arg)[0];
493 return 1;
494 }
495 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
496 *p = PyByteArray_AS_STRING(arg)[0];
497 return 1;
498 }
499 else {
500 int overflow;
501 long ival = PyLong_AsLongAndOverflow(arg, &overflow);
502 if (ival == -1 && PyErr_Occurred()) {
503 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
504 goto onError;
505 }
506 return 0;
507 }
508 if (!(0 <= ival && ival <= 255)) {
509 /* this includes an overflow in converting to C long */
510 PyErr_SetString(PyExc_OverflowError,
511 "%c arg not in range(256)");
512 return 0;
513 }
514 *p = (char)ival;
515 return 1;
516 }
517 onError:
518 PyErr_SetString(PyExc_TypeError,
519 "%c requires an integer in range(256) or a single byte");
520 return 0;
521 }
522
523 static PyObject *_PyBytes_FromBuffer(PyObject *x);
524
525 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)526 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
527 {
528 PyObject *func, *result;
529 /* is it a bytes object? */
530 if (PyBytes_Check(v)) {
531 *pbuf = PyBytes_AS_STRING(v);
532 *plen = PyBytes_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
535 }
536 if (PyByteArray_Check(v)) {
537 *pbuf = PyByteArray_AS_STRING(v);
538 *plen = PyByteArray_GET_SIZE(v);
539 Py_INCREF(v);
540 return v;
541 }
542 /* does it support __bytes__? */
543 func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
544 if (func != NULL) {
545 result = _PyObject_CallNoArgs(func);
546 Py_DECREF(func);
547 if (result == NULL)
548 return NULL;
549 if (!PyBytes_Check(result)) {
550 PyErr_Format(PyExc_TypeError,
551 "__bytes__ returned non-bytes (type %.200s)",
552 Py_TYPE(result)->tp_name);
553 Py_DECREF(result);
554 return NULL;
555 }
556 *pbuf = PyBytes_AS_STRING(result);
557 *plen = PyBytes_GET_SIZE(result);
558 return result;
559 }
560 /* does it support buffer protocol? */
561 if (PyObject_CheckBuffer(v)) {
562 /* maybe we can avoid making a copy of the buffer object here? */
563 result = _PyBytes_FromBuffer(v);
564 if (result == NULL)
565 return NULL;
566 *pbuf = PyBytes_AS_STRING(result);
567 *plen = PyBytes_GET_SIZE(result);
568 return result;
569 }
570 PyErr_Format(PyExc_TypeError,
571 "%%b requires a bytes-like object, "
572 "or an object that implements __bytes__, not '%.100s'",
573 Py_TYPE(v)->tp_name);
574 return NULL;
575 }
576
577 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
578
579 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)580 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
581 PyObject *args, int use_bytearray)
582 {
583 const char *fmt;
584 char *res;
585 Py_ssize_t arglen, argidx;
586 Py_ssize_t fmtcnt;
587 int args_owned = 0;
588 PyObject *dict = NULL;
589 _PyBytesWriter writer;
590
591 if (args == NULL) {
592 PyErr_BadInternalCall();
593 return NULL;
594 }
595 fmt = format;
596 fmtcnt = format_len;
597
598 _PyBytesWriter_Init(&writer);
599 writer.use_bytearray = use_bytearray;
600
601 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
602 if (res == NULL)
603 return NULL;
604 if (!use_bytearray)
605 writer.overallocate = 1;
606
607 if (PyTuple_Check(args)) {
608 arglen = PyTuple_GET_SIZE(args);
609 argidx = 0;
610 }
611 else {
612 arglen = -1;
613 argidx = -2;
614 }
615 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
616 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
617 !PyByteArray_Check(args)) {
618 dict = args;
619 }
620
621 while (--fmtcnt >= 0) {
622 if (*fmt != '%') {
623 Py_ssize_t len;
624 char *pos;
625
626 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
627 if (pos != NULL)
628 len = pos - fmt;
629 else
630 len = fmtcnt + 1;
631 assert(len != 0);
632
633 memcpy(res, fmt, len);
634 res += len;
635 fmt += len;
636 fmtcnt -= (len - 1);
637 }
638 else {
639 /* Got a format specifier */
640 int flags = 0;
641 Py_ssize_t width = -1;
642 int prec = -1;
643 int c = '\0';
644 int fill;
645 PyObject *v = NULL;
646 PyObject *temp = NULL;
647 const char *pbuf = NULL;
648 int sign;
649 Py_ssize_t len = 0;
650 char onechar; /* For byte_converter() */
651 Py_ssize_t alloc;
652
653 fmt++;
654 if (*fmt == '%') {
655 *res++ = '%';
656 fmt++;
657 fmtcnt--;
658 continue;
659 }
660 if (*fmt == '(') {
661 const char *keystart;
662 Py_ssize_t keylen;
663 PyObject *key;
664 int pcount = 1;
665
666 if (dict == NULL) {
667 PyErr_SetString(PyExc_TypeError,
668 "format requires a mapping");
669 goto error;
670 }
671 ++fmt;
672 --fmtcnt;
673 keystart = fmt;
674 /* Skip over balanced parentheses */
675 while (pcount > 0 && --fmtcnt >= 0) {
676 if (*fmt == ')')
677 --pcount;
678 else if (*fmt == '(')
679 ++pcount;
680 fmt++;
681 }
682 keylen = fmt - keystart - 1;
683 if (fmtcnt < 0 || pcount > 0) {
684 PyErr_SetString(PyExc_ValueError,
685 "incomplete format key");
686 goto error;
687 }
688 key = PyBytes_FromStringAndSize(keystart,
689 keylen);
690 if (key == NULL)
691 goto error;
692 if (args_owned) {
693 Py_DECREF(args);
694 args_owned = 0;
695 }
696 args = PyObject_GetItem(dict, key);
697 Py_DECREF(key);
698 if (args == NULL) {
699 goto error;
700 }
701 args_owned = 1;
702 arglen = -1;
703 argidx = -2;
704 }
705
706 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
707 while (--fmtcnt >= 0) {
708 switch (c = *fmt++) {
709 case '-': flags |= F_LJUST; continue;
710 case '+': flags |= F_SIGN; continue;
711 case ' ': flags |= F_BLANK; continue;
712 case '#': flags |= F_ALT; continue;
713 case '0': flags |= F_ZERO; continue;
714 }
715 break;
716 }
717
718 /* Parse width. Example: "%10s" => width=10 */
719 if (c == '*') {
720 v = getnextarg(args, arglen, &argidx);
721 if (v == NULL)
722 goto error;
723 if (!PyLong_Check(v)) {
724 PyErr_SetString(PyExc_TypeError,
725 "* wants int");
726 goto error;
727 }
728 width = PyLong_AsSsize_t(v);
729 if (width == -1 && PyErr_Occurred())
730 goto error;
731 if (width < 0) {
732 flags |= F_LJUST;
733 width = -width;
734 }
735 if (--fmtcnt >= 0)
736 c = *fmt++;
737 }
738 else if (c >= 0 && isdigit(c)) {
739 width = c - '0';
740 while (--fmtcnt >= 0) {
741 c = Py_CHARMASK(*fmt++);
742 if (!isdigit(c))
743 break;
744 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
745 PyErr_SetString(
746 PyExc_ValueError,
747 "width too big");
748 goto error;
749 }
750 width = width*10 + (c - '0');
751 }
752 }
753
754 /* Parse precision. Example: "%.3f" => prec=3 */
755 if (c == '.') {
756 prec = 0;
757 if (--fmtcnt >= 0)
758 c = *fmt++;
759 if (c == '*') {
760 v = getnextarg(args, arglen, &argidx);
761 if (v == NULL)
762 goto error;
763 if (!PyLong_Check(v)) {
764 PyErr_SetString(
765 PyExc_TypeError,
766 "* wants int");
767 goto error;
768 }
769 prec = _PyLong_AsInt(v);
770 if (prec == -1 && PyErr_Occurred())
771 goto error;
772 if (prec < 0)
773 prec = 0;
774 if (--fmtcnt >= 0)
775 c = *fmt++;
776 }
777 else if (c >= 0 && isdigit(c)) {
778 prec = c - '0';
779 while (--fmtcnt >= 0) {
780 c = Py_CHARMASK(*fmt++);
781 if (!isdigit(c))
782 break;
783 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
784 PyErr_SetString(
785 PyExc_ValueError,
786 "prec too big");
787 goto error;
788 }
789 prec = prec*10 + (c - '0');
790 }
791 }
792 } /* prec */
793 if (fmtcnt >= 0) {
794 if (c == 'h' || c == 'l' || c == 'L') {
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
798 }
799 if (fmtcnt < 0) {
800 PyErr_SetString(PyExc_ValueError,
801 "incomplete format");
802 goto error;
803 }
804 v = getnextarg(args, arglen, &argidx);
805 if (v == NULL)
806 goto error;
807
808 if (fmtcnt == 0) {
809 /* last write: disable writer overallocation */
810 writer.overallocate = 0;
811 }
812
813 sign = 0;
814 fill = ' ';
815 switch (c) {
816 case 'r':
817 // %r is only for 2/3 code; 3 only code should use %a
818 case 'a':
819 temp = PyObject_ASCII(v);
820 if (temp == NULL)
821 goto error;
822 assert(PyUnicode_IS_ASCII(temp));
823 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
824 len = PyUnicode_GET_LENGTH(temp);
825 if (prec >= 0 && len > prec)
826 len = prec;
827 break;
828
829 case 's':
830 // %s is only for 2/3 code; 3 only code should use %b
831 case 'b':
832 temp = format_obj(v, &pbuf, &len);
833 if (temp == NULL)
834 goto error;
835 if (prec >= 0 && len > prec)
836 len = prec;
837 break;
838
839 case 'i':
840 case 'd':
841 case 'u':
842 case 'o':
843 case 'x':
844 case 'X':
845 if (PyLong_CheckExact(v)
846 && width == -1 && prec == -1
847 && !(flags & (F_SIGN | F_BLANK))
848 && c != 'X')
849 {
850 /* Fast path */
851 int alternate = flags & F_ALT;
852 int base;
853
854 switch(c)
855 {
856 default:
857 Py_UNREACHABLE();
858 case 'd':
859 case 'i':
860 case 'u':
861 base = 10;
862 break;
863 case 'o':
864 base = 8;
865 break;
866 case 'x':
867 case 'X':
868 base = 16;
869 break;
870 }
871
872 /* Fast path */
873 writer.min_size -= 2; /* size preallocated for "%d" */
874 res = _PyLong_FormatBytesWriter(&writer, res,
875 v, base, alternate);
876 if (res == NULL)
877 goto error;
878 continue;
879 }
880
881 temp = formatlong(v, flags, prec, c);
882 if (!temp)
883 goto error;
884 assert(PyUnicode_IS_ASCII(temp));
885 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
886 len = PyUnicode_GET_LENGTH(temp);
887 sign = 1;
888 if (flags & F_ZERO)
889 fill = '0';
890 break;
891
892 case 'e':
893 case 'E':
894 case 'f':
895 case 'F':
896 case 'g':
897 case 'G':
898 if (width == -1 && prec == -1
899 && !(flags & (F_SIGN | F_BLANK)))
900 {
901 /* Fast path */
902 writer.min_size -= 2; /* size preallocated for "%f" */
903 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
904 if (res == NULL)
905 goto error;
906 continue;
907 }
908
909 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
910 goto error;
911 pbuf = PyBytes_AS_STRING(temp);
912 len = PyBytes_GET_SIZE(temp);
913 sign = 1;
914 if (flags & F_ZERO)
915 fill = '0';
916 break;
917
918 case 'c':
919 pbuf = &onechar;
920 len = byte_converter(v, &onechar);
921 if (!len)
922 goto error;
923 if (width == -1) {
924 /* Fast path */
925 *res++ = onechar;
926 continue;
927 }
928 break;
929
930 default:
931 PyErr_Format(PyExc_ValueError,
932 "unsupported format character '%c' (0x%x) "
933 "at index %zd",
934 c, c,
935 (Py_ssize_t)(fmt - 1 - format));
936 goto error;
937 }
938
939 if (sign) {
940 if (*pbuf == '-' || *pbuf == '+') {
941 sign = *pbuf++;
942 len--;
943 }
944 else if (flags & F_SIGN)
945 sign = '+';
946 else if (flags & F_BLANK)
947 sign = ' ';
948 else
949 sign = 0;
950 }
951 if (width < len)
952 width = len;
953
954 alloc = width;
955 if (sign != 0 && len == width)
956 alloc++;
957 /* 2: size preallocated for %s */
958 if (alloc > 2) {
959 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
960 if (res == NULL)
961 goto error;
962 }
963 #ifndef NDEBUG
964 char *before = res;
965 #endif
966
967 /* Write the sign if needed */
968 if (sign) {
969 if (fill != ' ')
970 *res++ = sign;
971 if (width > len)
972 width--;
973 }
974
975 /* Write the numeric prefix for "x", "X" and "o" formats
976 if the alternate form is used.
977 For example, write "0x" for the "%#x" format. */
978 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
979 assert(pbuf[0] == '0');
980 assert(pbuf[1] == c);
981 if (fill != ' ') {
982 *res++ = *pbuf++;
983 *res++ = *pbuf++;
984 }
985 width -= 2;
986 if (width < 0)
987 width = 0;
988 len -= 2;
989 }
990
991 /* Pad left with the fill character if needed */
992 if (width > len && !(flags & F_LJUST)) {
993 memset(res, fill, width - len);
994 res += (width - len);
995 width = len;
996 }
997
998 /* If padding with spaces: write sign if needed and/or numeric
999 prefix if the alternate form is used */
1000 if (fill == ' ') {
1001 if (sign)
1002 *res++ = sign;
1003 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1004 assert(pbuf[0] == '0');
1005 assert(pbuf[1] == c);
1006 *res++ = *pbuf++;
1007 *res++ = *pbuf++;
1008 }
1009 }
1010
1011 /* Copy bytes */
1012 memcpy(res, pbuf, len);
1013 res += len;
1014
1015 /* Pad right with the fill character if needed */
1016 if (width > len) {
1017 memset(res, ' ', width - len);
1018 res += (width - len);
1019 }
1020
1021 if (dict && (argidx < arglen)) {
1022 PyErr_SetString(PyExc_TypeError,
1023 "not all arguments converted during bytes formatting");
1024 Py_XDECREF(temp);
1025 goto error;
1026 }
1027 Py_XDECREF(temp);
1028
1029 #ifndef NDEBUG
1030 /* check that we computed the exact size for this write */
1031 assert((res - before) == alloc);
1032 #endif
1033 } /* '%' */
1034
1035 /* If overallocation was disabled, ensure that it was the last
1036 write. Otherwise, we missed an optimization */
1037 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1038 } /* until end */
1039
1040 if (argidx < arglen && !dict) {
1041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
1043 goto error;
1044 }
1045
1046 if (args_owned) {
1047 Py_DECREF(args);
1048 }
1049 return _PyBytesWriter_Finish(&writer, res);
1050
1051 error:
1052 _PyBytesWriter_Dealloc(&writer);
1053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
1056 return NULL;
1057 }
1058
1059 /* Unescape a backslash-escaped string. */
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,const char ** first_invalid_escape)1060 PyObject *_PyBytes_DecodeEscape(const char *s,
1061 Py_ssize_t len,
1062 const char *errors,
1063 const char **first_invalid_escape)
1064 {
1065 int c;
1066 char *p;
1067 const char *end;
1068 _PyBytesWriter writer;
1069
1070 _PyBytesWriter_Init(&writer);
1071
1072 p = _PyBytesWriter_Alloc(&writer, len);
1073 if (p == NULL)
1074 return NULL;
1075 writer.overallocate = 1;
1076
1077 *first_invalid_escape = NULL;
1078
1079 end = s + len;
1080 while (s < end) {
1081 if (*s != '\\') {
1082 *p++ = *s++;
1083 continue;
1084 }
1085
1086 s++;
1087 if (s == end) {
1088 PyErr_SetString(PyExc_ValueError,
1089 "Trailing \\ in string");
1090 goto failed;
1091 }
1092
1093 switch (*s++) {
1094 /* XXX This assumes ASCII! */
1095 case '\n': break;
1096 case '\\': *p++ = '\\'; break;
1097 case '\'': *p++ = '\''; break;
1098 case '\"': *p++ = '\"'; break;
1099 case 'b': *p++ = '\b'; break;
1100 case 'f': *p++ = '\014'; break; /* FF */
1101 case 't': *p++ = '\t'; break;
1102 case 'n': *p++ = '\n'; break;
1103 case 'r': *p++ = '\r'; break;
1104 case 'v': *p++ = '\013'; break; /* VT */
1105 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1106 case '0': case '1': case '2': case '3':
1107 case '4': case '5': case '6': case '7':
1108 c = s[-1] - '0';
1109 if (s < end && '0' <= *s && *s <= '7') {
1110 c = (c<<3) + *s++ - '0';
1111 if (s < end && '0' <= *s && *s <= '7')
1112 c = (c<<3) + *s++ - '0';
1113 }
1114 if (c > 0377) {
1115 if (*first_invalid_escape == NULL) {
1116 *first_invalid_escape = s-3; /* Back up 3 chars, since we've
1117 already incremented s. */
1118 }
1119 }
1120 *p++ = c;
1121 break;
1122 case 'x':
1123 if (s+1 < end) {
1124 int digit1, digit2;
1125 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1126 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1127 if (digit1 < 16 && digit2 < 16) {
1128 *p++ = (unsigned char)((digit1 << 4) + digit2);
1129 s += 2;
1130 break;
1131 }
1132 }
1133 /* invalid hexadecimal digits */
1134
1135 if (!errors || strcmp(errors, "strict") == 0) {
1136 PyErr_Format(PyExc_ValueError,
1137 "invalid \\x escape at position %zd",
1138 s - 2 - (end - len));
1139 goto failed;
1140 }
1141 if (strcmp(errors, "replace") == 0) {
1142 *p++ = '?';
1143 } else if (strcmp(errors, "ignore") == 0)
1144 /* do nothing */;
1145 else {
1146 PyErr_Format(PyExc_ValueError,
1147 "decoding error; unknown "
1148 "error handling code: %.400s",
1149 errors);
1150 goto failed;
1151 }
1152 /* skip \x */
1153 if (s < end && Py_ISXDIGIT(s[0]))
1154 s++; /* and a hexdigit */
1155 break;
1156
1157 default:
1158 if (*first_invalid_escape == NULL) {
1159 *first_invalid_escape = s-1; /* Back up one char, since we've
1160 already incremented s. */
1161 }
1162 *p++ = '\\';
1163 s--;
1164 }
1165 }
1166
1167 return _PyBytesWriter_Finish(&writer, p);
1168
1169 failed:
1170 _PyBytesWriter_Dealloc(&writer);
1171 return NULL;
1172 }
1173
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t Py_UNUSED (unicode),const char * Py_UNUSED (recode_encoding))1174 PyObject *PyBytes_DecodeEscape(const char *s,
1175 Py_ssize_t len,
1176 const char *errors,
1177 Py_ssize_t Py_UNUSED(unicode),
1178 const char *Py_UNUSED(recode_encoding))
1179 {
1180 const char* first_invalid_escape;
1181 PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1182 &first_invalid_escape);
1183 if (result == NULL)
1184 return NULL;
1185 if (first_invalid_escape != NULL) {
1186 unsigned char c = *first_invalid_escape;
1187 if ('4' <= c && c <= '7') {
1188 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1189 "invalid octal escape sequence '\\%.3s'",
1190 first_invalid_escape) < 0)
1191 {
1192 Py_DECREF(result);
1193 return NULL;
1194 }
1195 }
1196 else {
1197 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1198 "invalid escape sequence '\\%c'",
1199 c) < 0)
1200 {
1201 Py_DECREF(result);
1202 return NULL;
1203 }
1204 }
1205 }
1206 return result;
1207
1208 }
1209 /* -------------------------------------------------------------------- */
1210 /* object api */
1211
1212 Py_ssize_t
PyBytes_Size(PyObject * op)1213 PyBytes_Size(PyObject *op)
1214 {
1215 if (!PyBytes_Check(op)) {
1216 PyErr_Format(PyExc_TypeError,
1217 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1218 return -1;
1219 }
1220 return Py_SIZE(op);
1221 }
1222
1223 char *
PyBytes_AsString(PyObject * op)1224 PyBytes_AsString(PyObject *op)
1225 {
1226 if (!PyBytes_Check(op)) {
1227 PyErr_Format(PyExc_TypeError,
1228 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229 return NULL;
1230 }
1231 return ((PyBytesObject *)op)->ob_sval;
1232 }
1233
1234 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1235 PyBytes_AsStringAndSize(PyObject *obj,
1236 char **s,
1237 Py_ssize_t *len)
1238 {
1239 if (s == NULL) {
1240 PyErr_BadInternalCall();
1241 return -1;
1242 }
1243
1244 if (!PyBytes_Check(obj)) {
1245 PyErr_Format(PyExc_TypeError,
1246 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1247 return -1;
1248 }
1249
1250 *s = PyBytes_AS_STRING(obj);
1251 if (len != NULL)
1252 *len = PyBytes_GET_SIZE(obj);
1253 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1254 PyErr_SetString(PyExc_ValueError,
1255 "embedded null byte");
1256 return -1;
1257 }
1258 return 0;
1259 }
1260
1261 /* -------------------------------------------------------------------- */
1262 /* Methods */
1263
1264 #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1265
1266 #include "stringlib/stringdefs.h"
1267 #define STRINGLIB_MUTABLE 0
1268
1269 #include "stringlib/fastsearch.h"
1270 #include "stringlib/count.h"
1271 #include "stringlib/find.h"
1272 #include "stringlib/join.h"
1273 #include "stringlib/partition.h"
1274 #include "stringlib/split.h"
1275 #include "stringlib/ctype.h"
1276
1277 #include "stringlib/transmogrify.h"
1278
1279 #undef STRINGLIB_GET_EMPTY
1280
1281 Py_ssize_t
_PyBytes_Find(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1282 _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1283 const char *needle, Py_ssize_t len_needle,
1284 Py_ssize_t offset)
1285 {
1286 return stringlib_find(haystack, len_haystack,
1287 needle, len_needle, offset);
1288 }
1289
1290 Py_ssize_t
_PyBytes_ReverseFind(const char * haystack,Py_ssize_t len_haystack,const char * needle,Py_ssize_t len_needle,Py_ssize_t offset)1291 _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1292 const char *needle, Py_ssize_t len_needle,
1293 Py_ssize_t offset)
1294 {
1295 return stringlib_rfind(haystack, len_haystack,
1296 needle, len_needle, offset);
1297 }
1298
1299 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1300 PyBytes_Repr(PyObject *obj, int smartquotes)
1301 {
1302 PyBytesObject* op = (PyBytesObject*) obj;
1303 Py_ssize_t i, length = Py_SIZE(op);
1304 Py_ssize_t newsize, squotes, dquotes;
1305 PyObject *v;
1306 unsigned char quote;
1307 const unsigned char *s;
1308 Py_UCS1 *p;
1309
1310 /* Compute size of output string */
1311 squotes = dquotes = 0;
1312 newsize = 3; /* b'' */
1313 s = (const unsigned char*)op->ob_sval;
1314 for (i = 0; i < length; i++) {
1315 Py_ssize_t incr = 1;
1316 switch(s[i]) {
1317 case '\'': squotes++; break;
1318 case '"': dquotes++; break;
1319 case '\\': case '\t': case '\n': case '\r':
1320 incr = 2; break; /* \C */
1321 default:
1322 if (s[i] < ' ' || s[i] >= 0x7f)
1323 incr = 4; /* \xHH */
1324 }
1325 if (newsize > PY_SSIZE_T_MAX - incr)
1326 goto overflow;
1327 newsize += incr;
1328 }
1329 quote = '\'';
1330 if (smartquotes && squotes && !dquotes)
1331 quote = '"';
1332 if (squotes && quote == '\'') {
1333 if (newsize > PY_SSIZE_T_MAX - squotes)
1334 goto overflow;
1335 newsize += squotes;
1336 }
1337
1338 v = PyUnicode_New(newsize, 127);
1339 if (v == NULL) {
1340 return NULL;
1341 }
1342 p = PyUnicode_1BYTE_DATA(v);
1343
1344 *p++ = 'b', *p++ = quote;
1345 for (i = 0; i < length; i++) {
1346 unsigned char c = op->ob_sval[i];
1347 if (c == quote || c == '\\')
1348 *p++ = '\\', *p++ = c;
1349 else if (c == '\t')
1350 *p++ = '\\', *p++ = 't';
1351 else if (c == '\n')
1352 *p++ = '\\', *p++ = 'n';
1353 else if (c == '\r')
1354 *p++ = '\\', *p++ = 'r';
1355 else if (c < ' ' || c >= 0x7f) {
1356 *p++ = '\\';
1357 *p++ = 'x';
1358 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1359 *p++ = Py_hexdigits[c & 0xf];
1360 }
1361 else
1362 *p++ = c;
1363 }
1364 *p++ = quote;
1365 assert(_PyUnicode_CheckConsistency(v, 1));
1366 return v;
1367
1368 overflow:
1369 PyErr_SetString(PyExc_OverflowError,
1370 "bytes object is too large to make repr");
1371 return NULL;
1372 }
1373
1374 static PyObject *
bytes_repr(PyObject * op)1375 bytes_repr(PyObject *op)
1376 {
1377 return PyBytes_Repr(op, 1);
1378 }
1379
1380 static PyObject *
bytes_str(PyObject * op)1381 bytes_str(PyObject *op)
1382 {
1383 if (_Py_GetConfig()->bytes_warning) {
1384 if (PyErr_WarnEx(PyExc_BytesWarning,
1385 "str() on a bytes instance", 1)) {
1386 return NULL;
1387 }
1388 }
1389 return bytes_repr(op);
1390 }
1391
1392 static Py_ssize_t
bytes_length(PyBytesObject * a)1393 bytes_length(PyBytesObject *a)
1394 {
1395 return Py_SIZE(a);
1396 }
1397
1398 /* This is also used by PyBytes_Concat() */
1399 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1400 bytes_concat(PyObject *a, PyObject *b)
1401 {
1402 Py_buffer va, vb;
1403 PyObject *result = NULL;
1404
1405 va.len = -1;
1406 vb.len = -1;
1407 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1408 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1409 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1410 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1411 goto done;
1412 }
1413
1414 /* Optimize end cases */
1415 if (va.len == 0 && PyBytes_CheckExact(b)) {
1416 result = b;
1417 Py_INCREF(result);
1418 goto done;
1419 }
1420 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1421 result = a;
1422 Py_INCREF(result);
1423 goto done;
1424 }
1425
1426 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1427 PyErr_NoMemory();
1428 goto done;
1429 }
1430
1431 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1432 if (result != NULL) {
1433 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1434 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1435 }
1436
1437 done:
1438 if (va.len != -1)
1439 PyBuffer_Release(&va);
1440 if (vb.len != -1)
1441 PyBuffer_Release(&vb);
1442 return result;
1443 }
1444
1445 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1446 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1447 {
1448 Py_ssize_t size;
1449 PyBytesObject *op;
1450 size_t nbytes;
1451 if (n < 0)
1452 n = 0;
1453 /* watch out for overflows: the size can overflow int,
1454 * and the # of bytes needed can overflow size_t
1455 */
1456 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1457 PyErr_SetString(PyExc_OverflowError,
1458 "repeated bytes are too long");
1459 return NULL;
1460 }
1461 size = Py_SIZE(a) * n;
1462 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1463 Py_INCREF(a);
1464 return (PyObject *)a;
1465 }
1466 nbytes = (size_t)size;
1467 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1468 PyErr_SetString(PyExc_OverflowError,
1469 "repeated bytes are too long");
1470 return NULL;
1471 }
1472 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1473 if (op == NULL) {
1474 return PyErr_NoMemory();
1475 }
1476 _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1477 _Py_COMP_DIAG_PUSH
1478 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1479 op->ob_shash = -1;
1480 _Py_COMP_DIAG_POP
1481 op->ob_sval[size] = '\0';
1482
1483 _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1484
1485 return (PyObject *) op;
1486 }
1487
1488 static int
bytes_contains(PyObject * self,PyObject * arg)1489 bytes_contains(PyObject *self, PyObject *arg)
1490 {
1491 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1492 }
1493
1494 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1495 bytes_item(PyBytesObject *a, Py_ssize_t i)
1496 {
1497 if (i < 0 || i >= Py_SIZE(a)) {
1498 PyErr_SetString(PyExc_IndexError, "index out of range");
1499 return NULL;
1500 }
1501 return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1502 }
1503
1504 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1505 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1506 {
1507 int cmp;
1508 Py_ssize_t len;
1509
1510 len = Py_SIZE(a);
1511 if (Py_SIZE(b) != len)
1512 return 0;
1513
1514 if (a->ob_sval[0] != b->ob_sval[0])
1515 return 0;
1516
1517 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1518 return (cmp == 0);
1519 }
1520
1521 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1522 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1523 {
1524 int c;
1525 Py_ssize_t len_a, len_b;
1526 Py_ssize_t min_len;
1527
1528 /* Make sure both arguments are strings. */
1529 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1530 if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1531 if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1532 if (PyErr_WarnEx(PyExc_BytesWarning,
1533 "Comparison between bytes and string", 1))
1534 return NULL;
1535 }
1536 if (PyLong_Check(a) || PyLong_Check(b)) {
1537 if (PyErr_WarnEx(PyExc_BytesWarning,
1538 "Comparison between bytes and int", 1))
1539 return NULL;
1540 }
1541 }
1542 Py_RETURN_NOTIMPLEMENTED;
1543 }
1544 else if (a == b) {
1545 switch (op) {
1546 case Py_EQ:
1547 case Py_LE:
1548 case Py_GE:
1549 /* a byte string is equal to itself */
1550 Py_RETURN_TRUE;
1551 case Py_NE:
1552 case Py_LT:
1553 case Py_GT:
1554 Py_RETURN_FALSE;
1555 default:
1556 PyErr_BadArgument();
1557 return NULL;
1558 }
1559 }
1560 else if (op == Py_EQ || op == Py_NE) {
1561 int eq = bytes_compare_eq(a, b);
1562 eq ^= (op == Py_NE);
1563 return PyBool_FromLong(eq);
1564 }
1565 else {
1566 len_a = Py_SIZE(a);
1567 len_b = Py_SIZE(b);
1568 min_len = Py_MIN(len_a, len_b);
1569 if (min_len > 0) {
1570 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1571 if (c == 0)
1572 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1573 }
1574 else
1575 c = 0;
1576 if (c != 0)
1577 Py_RETURN_RICHCOMPARE(c, 0, op);
1578 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1579 }
1580 }
1581
1582 static Py_hash_t
bytes_hash(PyBytesObject * a)1583 bytes_hash(PyBytesObject *a)
1584 {
1585 _Py_COMP_DIAG_PUSH
1586 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1587 if (a->ob_shash == -1) {
1588 /* Can't fail */
1589 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1590 }
1591 return a->ob_shash;
1592 _Py_COMP_DIAG_POP
1593 }
1594
1595 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1596 bytes_subscript(PyBytesObject* self, PyObject* item)
1597 {
1598 if (_PyIndex_Check(item)) {
1599 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1600 if (i == -1 && PyErr_Occurred())
1601 return NULL;
1602 if (i < 0)
1603 i += PyBytes_GET_SIZE(self);
1604 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1605 PyErr_SetString(PyExc_IndexError,
1606 "index out of range");
1607 return NULL;
1608 }
1609 return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1610 }
1611 else if (PySlice_Check(item)) {
1612 Py_ssize_t start, stop, step, slicelength, i;
1613 size_t cur;
1614 const char* source_buf;
1615 char* result_buf;
1616 PyObject* result;
1617
1618 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1619 return NULL;
1620 }
1621 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1622 &stop, step);
1623
1624 if (slicelength <= 0) {
1625 return PyBytes_FromStringAndSize("", 0);
1626 }
1627 else if (start == 0 && step == 1 &&
1628 slicelength == PyBytes_GET_SIZE(self) &&
1629 PyBytes_CheckExact(self)) {
1630 Py_INCREF(self);
1631 return (PyObject *)self;
1632 }
1633 else if (step == 1) {
1634 return PyBytes_FromStringAndSize(
1635 PyBytes_AS_STRING(self) + start,
1636 slicelength);
1637 }
1638 else {
1639 source_buf = PyBytes_AS_STRING(self);
1640 result = PyBytes_FromStringAndSize(NULL, slicelength);
1641 if (result == NULL)
1642 return NULL;
1643
1644 result_buf = PyBytes_AS_STRING(result);
1645 for (cur = start, i = 0; i < slicelength;
1646 cur += step, i++) {
1647 result_buf[i] = source_buf[cur];
1648 }
1649
1650 return result;
1651 }
1652 }
1653 else {
1654 PyErr_Format(PyExc_TypeError,
1655 "byte indices must be integers or slices, not %.200s",
1656 Py_TYPE(item)->tp_name);
1657 return NULL;
1658 }
1659 }
1660
1661 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1662 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1663 {
1664 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1665 1, flags);
1666 }
1667
1668 static PySequenceMethods bytes_as_sequence = {
1669 (lenfunc)bytes_length, /*sq_length*/
1670 (binaryfunc)bytes_concat, /*sq_concat*/
1671 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1672 (ssizeargfunc)bytes_item, /*sq_item*/
1673 0, /*sq_slice*/
1674 0, /*sq_ass_item*/
1675 0, /*sq_ass_slice*/
1676 (objobjproc)bytes_contains /*sq_contains*/
1677 };
1678
1679 static PyMappingMethods bytes_as_mapping = {
1680 (lenfunc)bytes_length,
1681 (binaryfunc)bytes_subscript,
1682 0,
1683 };
1684
1685 static PyBufferProcs bytes_as_buffer = {
1686 (getbufferproc)bytes_buffer_getbuffer,
1687 NULL,
1688 };
1689
1690
1691 /*[clinic input]
1692 bytes.__bytes__
1693 Convert this value to exact type bytes.
1694 [clinic start generated code]*/
1695
1696 static PyObject *
bytes___bytes___impl(PyBytesObject * self)1697 bytes___bytes___impl(PyBytesObject *self)
1698 /*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1699 {
1700 if (PyBytes_CheckExact(self)) {
1701 Py_INCREF(self);
1702 return (PyObject *)self;
1703 }
1704 else {
1705 return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1706 }
1707 }
1708
1709
1710 #define LEFTSTRIP 0
1711 #define RIGHTSTRIP 1
1712 #define BOTHSTRIP 2
1713
1714 /*[clinic input]
1715 bytes.split
1716
1717 sep: object = None
1718 The delimiter according which to split the bytes.
1719 None (the default value) means split on ASCII whitespace characters
1720 (space, tab, return, newline, formfeed, vertical tab).
1721 maxsplit: Py_ssize_t = -1
1722 Maximum number of splits to do.
1723 -1 (the default value) means no limit.
1724
1725 Return a list of the sections in the bytes, using sep as the delimiter.
1726 [clinic start generated code]*/
1727
1728 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1729 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1730 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1731 {
1732 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1733 const char *s = PyBytes_AS_STRING(self), *sub;
1734 Py_buffer vsub;
1735 PyObject *list;
1736
1737 if (maxsplit < 0)
1738 maxsplit = PY_SSIZE_T_MAX;
1739 if (sep == Py_None)
1740 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1741 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1742 return NULL;
1743 sub = vsub.buf;
1744 n = vsub.len;
1745
1746 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1747 PyBuffer_Release(&vsub);
1748 return list;
1749 }
1750
1751 /*[clinic input]
1752 bytes.partition
1753
1754 sep: Py_buffer
1755 /
1756
1757 Partition the bytes into three parts using the given separator.
1758
1759 This will search for the separator sep in the bytes. If the separator is found,
1760 returns a 3-tuple containing the part before the separator, the separator
1761 itself, and the part after it.
1762
1763 If the separator is not found, returns a 3-tuple containing the original bytes
1764 object and two empty bytes objects.
1765 [clinic start generated code]*/
1766
1767 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1768 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1769 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1770 {
1771 return stringlib_partition(
1772 (PyObject*) self,
1773 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1774 sep->obj, (const char *)sep->buf, sep->len
1775 );
1776 }
1777
1778 /*[clinic input]
1779 bytes.rpartition
1780
1781 sep: Py_buffer
1782 /
1783
1784 Partition the bytes into three parts using the given separator.
1785
1786 This will search for the separator sep in the bytes, starting at the end. If
1787 the separator is found, returns a 3-tuple containing the part before the
1788 separator, the separator itself, and the part after it.
1789
1790 If the separator is not found, returns a 3-tuple containing two empty bytes
1791 objects and the original bytes object.
1792 [clinic start generated code]*/
1793
1794 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1795 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1796 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1797 {
1798 return stringlib_rpartition(
1799 (PyObject*) self,
1800 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1801 sep->obj, (const char *)sep->buf, sep->len
1802 );
1803 }
1804
1805 /*[clinic input]
1806 bytes.rsplit = bytes.split
1807
1808 Return a list of the sections in the bytes, using sep as the delimiter.
1809
1810 Splitting is done starting at the end of the bytes and working to the front.
1811 [clinic start generated code]*/
1812
1813 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1814 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1815 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1816 {
1817 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1818 const char *s = PyBytes_AS_STRING(self), *sub;
1819 Py_buffer vsub;
1820 PyObject *list;
1821
1822 if (maxsplit < 0)
1823 maxsplit = PY_SSIZE_T_MAX;
1824 if (sep == Py_None)
1825 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1826 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1827 return NULL;
1828 sub = vsub.buf;
1829 n = vsub.len;
1830
1831 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1832 PyBuffer_Release(&vsub);
1833 return list;
1834 }
1835
1836
1837 /*[clinic input]
1838 bytes.join
1839
1840 iterable_of_bytes: object
1841 /
1842
1843 Concatenate any number of bytes objects.
1844
1845 The bytes whose method is called is inserted in between each pair.
1846
1847 The result is returned as a new bytes object.
1848
1849 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1850 [clinic start generated code]*/
1851
1852 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1853 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1854 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1855 {
1856 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1857 }
1858
1859 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1860 _PyBytes_Join(PyObject *sep, PyObject *x)
1861 {
1862 assert(sep != NULL && PyBytes_Check(sep));
1863 assert(x != NULL);
1864 return bytes_join((PyBytesObject*)sep, x);
1865 }
1866
1867 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1868 bytes_find(PyBytesObject *self, PyObject *args)
1869 {
1870 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1871 }
1872
1873 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1874 bytes_index(PyBytesObject *self, PyObject *args)
1875 {
1876 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1877 }
1878
1879
1880 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1881 bytes_rfind(PyBytesObject *self, PyObject *args)
1882 {
1883 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1884 }
1885
1886
1887 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1888 bytes_rindex(PyBytesObject *self, PyObject *args)
1889 {
1890 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1891 }
1892
1893
1894 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1895 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1896 {
1897 Py_buffer vsep;
1898 const char *s = PyBytes_AS_STRING(self);
1899 Py_ssize_t len = PyBytes_GET_SIZE(self);
1900 char *sep;
1901 Py_ssize_t seplen;
1902 Py_ssize_t i, j;
1903
1904 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1905 return NULL;
1906 sep = vsep.buf;
1907 seplen = vsep.len;
1908
1909 i = 0;
1910 if (striptype != RIGHTSTRIP) {
1911 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1912 i++;
1913 }
1914 }
1915
1916 j = len;
1917 if (striptype != LEFTSTRIP) {
1918 do {
1919 j--;
1920 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1921 j++;
1922 }
1923
1924 PyBuffer_Release(&vsep);
1925
1926 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1927 Py_INCREF(self);
1928 return (PyObject*)self;
1929 }
1930 else
1931 return PyBytes_FromStringAndSize(s+i, j-i);
1932 }
1933
1934
1935 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1936 do_strip(PyBytesObject *self, int striptype)
1937 {
1938 const char *s = PyBytes_AS_STRING(self);
1939 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1940
1941 i = 0;
1942 if (striptype != RIGHTSTRIP) {
1943 while (i < len && Py_ISSPACE(s[i])) {
1944 i++;
1945 }
1946 }
1947
1948 j = len;
1949 if (striptype != LEFTSTRIP) {
1950 do {
1951 j--;
1952 } while (j >= i && Py_ISSPACE(s[j]));
1953 j++;
1954 }
1955
1956 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1957 Py_INCREF(self);
1958 return (PyObject*)self;
1959 }
1960 else
1961 return PyBytes_FromStringAndSize(s+i, j-i);
1962 }
1963
1964
1965 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)1966 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1967 {
1968 if (bytes != Py_None) {
1969 return do_xstrip(self, striptype, bytes);
1970 }
1971 return do_strip(self, striptype);
1972 }
1973
1974 /*[clinic input]
1975 bytes.strip
1976
1977 bytes: object = None
1978 /
1979
1980 Strip leading and trailing bytes contained in the argument.
1981
1982 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1983 [clinic start generated code]*/
1984
1985 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)1986 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1987 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1988 {
1989 return do_argstrip(self, BOTHSTRIP, bytes);
1990 }
1991
1992 /*[clinic input]
1993 bytes.lstrip
1994
1995 bytes: object = None
1996 /
1997
1998 Strip leading bytes contained in the argument.
1999
2000 If the argument is omitted or None, strip leading ASCII whitespace.
2001 [clinic start generated code]*/
2002
2003 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2004 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2005 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2006 {
2007 return do_argstrip(self, LEFTSTRIP, bytes);
2008 }
2009
2010 /*[clinic input]
2011 bytes.rstrip
2012
2013 bytes: object = None
2014 /
2015
2016 Strip trailing bytes contained in the argument.
2017
2018 If the argument is omitted or None, strip trailing ASCII whitespace.
2019 [clinic start generated code]*/
2020
2021 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2022 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2023 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2024 {
2025 return do_argstrip(self, RIGHTSTRIP, bytes);
2026 }
2027
2028
2029 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2030 bytes_count(PyBytesObject *self, PyObject *args)
2031 {
2032 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2033 }
2034
2035
2036 /*[clinic input]
2037 bytes.translate
2038
2039 table: object
2040 Translation table, which must be a bytes object of length 256.
2041 /
2042 delete as deletechars: object(c_default="NULL") = b''
2043
2044 Return a copy with each character mapped by the given translation table.
2045
2046 All characters occurring in the optional argument delete are removed.
2047 The remaining characters are mapped through the given translation table.
2048 [clinic start generated code]*/
2049
2050 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2051 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2052 PyObject *deletechars)
2053 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2054 {
2055 const char *input;
2056 char *output;
2057 Py_buffer table_view = {NULL, NULL};
2058 Py_buffer del_table_view = {NULL, NULL};
2059 const char *table_chars;
2060 Py_ssize_t i, c, changed = 0;
2061 PyObject *input_obj = (PyObject*)self;
2062 const char *output_start, *del_table_chars=NULL;
2063 Py_ssize_t inlen, tablen, dellen = 0;
2064 PyObject *result;
2065 int trans_table[256];
2066
2067 if (PyBytes_Check(table)) {
2068 table_chars = PyBytes_AS_STRING(table);
2069 tablen = PyBytes_GET_SIZE(table);
2070 }
2071 else if (table == Py_None) {
2072 table_chars = NULL;
2073 tablen = 256;
2074 }
2075 else {
2076 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2077 return NULL;
2078 table_chars = table_view.buf;
2079 tablen = table_view.len;
2080 }
2081
2082 if (tablen != 256) {
2083 PyErr_SetString(PyExc_ValueError,
2084 "translation table must be 256 characters long");
2085 PyBuffer_Release(&table_view);
2086 return NULL;
2087 }
2088
2089 if (deletechars != NULL) {
2090 if (PyBytes_Check(deletechars)) {
2091 del_table_chars = PyBytes_AS_STRING(deletechars);
2092 dellen = PyBytes_GET_SIZE(deletechars);
2093 }
2094 else {
2095 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2096 PyBuffer_Release(&table_view);
2097 return NULL;
2098 }
2099 del_table_chars = del_table_view.buf;
2100 dellen = del_table_view.len;
2101 }
2102 }
2103 else {
2104 del_table_chars = NULL;
2105 dellen = 0;
2106 }
2107
2108 inlen = PyBytes_GET_SIZE(input_obj);
2109 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2110 if (result == NULL) {
2111 PyBuffer_Release(&del_table_view);
2112 PyBuffer_Release(&table_view);
2113 return NULL;
2114 }
2115 output_start = output = PyBytes_AS_STRING(result);
2116 input = PyBytes_AS_STRING(input_obj);
2117
2118 if (dellen == 0 && table_chars != NULL) {
2119 /* If no deletions are required, use faster code */
2120 for (i = inlen; --i >= 0; ) {
2121 c = Py_CHARMASK(*input++);
2122 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2123 changed = 1;
2124 }
2125 if (!changed && PyBytes_CheckExact(input_obj)) {
2126 Py_INCREF(input_obj);
2127 Py_DECREF(result);
2128 result = input_obj;
2129 }
2130 PyBuffer_Release(&del_table_view);
2131 PyBuffer_Release(&table_view);
2132 return result;
2133 }
2134
2135 if (table_chars == NULL) {
2136 for (i = 0; i < 256; i++)
2137 trans_table[i] = Py_CHARMASK(i);
2138 } else {
2139 for (i = 0; i < 256; i++)
2140 trans_table[i] = Py_CHARMASK(table_chars[i]);
2141 }
2142 PyBuffer_Release(&table_view);
2143
2144 for (i = 0; i < dellen; i++)
2145 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2146 PyBuffer_Release(&del_table_view);
2147
2148 for (i = inlen; --i >= 0; ) {
2149 c = Py_CHARMASK(*input++);
2150 if (trans_table[c] != -1)
2151 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2152 continue;
2153 changed = 1;
2154 }
2155 if (!changed && PyBytes_CheckExact(input_obj)) {
2156 Py_DECREF(result);
2157 Py_INCREF(input_obj);
2158 return input_obj;
2159 }
2160 /* Fix the size of the resulting byte string */
2161 if (inlen > 0)
2162 _PyBytes_Resize(&result, output - output_start);
2163 return result;
2164 }
2165
2166
2167 /*[clinic input]
2168
2169 @staticmethod
2170 bytes.maketrans
2171
2172 frm: Py_buffer
2173 to: Py_buffer
2174 /
2175
2176 Return a translation table useable for the bytes or bytearray translate method.
2177
2178 The returned table will be one where each byte in frm is mapped to the byte at
2179 the same position in to.
2180
2181 The bytes objects frm and to must be of the same length.
2182 [clinic start generated code]*/
2183
2184 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2185 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2186 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2187 {
2188 return _Py_bytes_maketrans(frm, to);
2189 }
2190
2191
2192 /*[clinic input]
2193 bytes.replace
2194
2195 old: Py_buffer
2196 new: Py_buffer
2197 count: Py_ssize_t = -1
2198 Maximum number of occurrences to replace.
2199 -1 (the default value) means replace all occurrences.
2200 /
2201
2202 Return a copy with all occurrences of substring old replaced by new.
2203
2204 If the optional argument count is given, only the first count occurrences are
2205 replaced.
2206 [clinic start generated code]*/
2207
2208 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2209 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2210 Py_ssize_t count)
2211 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2212 {
2213 return stringlib_replace((PyObject *)self,
2214 (const char *)old->buf, old->len,
2215 (const char *)new->buf, new->len, count);
2216 }
2217
2218 /** End DALKE **/
2219
2220 /*[clinic input]
2221 bytes.removeprefix as bytes_removeprefix
2222
2223 prefix: Py_buffer
2224 /
2225
2226 Return a bytes object with the given prefix string removed if present.
2227
2228 If the bytes starts with the prefix string, return bytes[len(prefix):].
2229 Otherwise, return a copy of the original bytes.
2230 [clinic start generated code]*/
2231
2232 static PyObject *
bytes_removeprefix_impl(PyBytesObject * self,Py_buffer * prefix)2233 bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2234 /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2235 {
2236 const char *self_start = PyBytes_AS_STRING(self);
2237 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2238 const char *prefix_start = prefix->buf;
2239 Py_ssize_t prefix_len = prefix->len;
2240
2241 if (self_len >= prefix_len
2242 && prefix_len > 0
2243 && memcmp(self_start, prefix_start, prefix_len) == 0)
2244 {
2245 return PyBytes_FromStringAndSize(self_start + prefix_len,
2246 self_len - prefix_len);
2247 }
2248
2249 if (PyBytes_CheckExact(self)) {
2250 Py_INCREF(self);
2251 return (PyObject *)self;
2252 }
2253
2254 return PyBytes_FromStringAndSize(self_start, self_len);
2255 }
2256
2257 /*[clinic input]
2258 bytes.removesuffix as bytes_removesuffix
2259
2260 suffix: Py_buffer
2261 /
2262
2263 Return a bytes object with the given suffix string removed if present.
2264
2265 If the bytes ends with the suffix string and that suffix is not empty,
2266 return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2267 bytes.
2268 [clinic start generated code]*/
2269
2270 static PyObject *
bytes_removesuffix_impl(PyBytesObject * self,Py_buffer * suffix)2271 bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2272 /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2273 {
2274 const char *self_start = PyBytes_AS_STRING(self);
2275 Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2276 const char *suffix_start = suffix->buf;
2277 Py_ssize_t suffix_len = suffix->len;
2278
2279 if (self_len >= suffix_len
2280 && suffix_len > 0
2281 && memcmp(self_start + self_len - suffix_len,
2282 suffix_start, suffix_len) == 0)
2283 {
2284 return PyBytes_FromStringAndSize(self_start,
2285 self_len - suffix_len);
2286 }
2287
2288 if (PyBytes_CheckExact(self)) {
2289 Py_INCREF(self);
2290 return (PyObject *)self;
2291 }
2292
2293 return PyBytes_FromStringAndSize(self_start, self_len);
2294 }
2295
2296 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2297 bytes_startswith(PyBytesObject *self, PyObject *args)
2298 {
2299 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2300 }
2301
2302 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2303 bytes_endswith(PyBytesObject *self, PyObject *args)
2304 {
2305 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2306 }
2307
2308
2309 /*[clinic input]
2310 bytes.decode
2311
2312 encoding: str(c_default="NULL") = 'utf-8'
2313 The encoding with which to decode the bytes.
2314 errors: str(c_default="NULL") = 'strict'
2315 The error handling scheme to use for the handling of decoding errors.
2316 The default is 'strict' meaning that decoding errors raise a
2317 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2318 as well as any other name registered with codecs.register_error that
2319 can handle UnicodeDecodeErrors.
2320
2321 Decode the bytes using the codec registered for encoding.
2322 [clinic start generated code]*/
2323
2324 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2325 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2326 const char *errors)
2327 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2328 {
2329 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2330 }
2331
2332
2333 /*[clinic input]
2334 bytes.splitlines
2335
2336 keepends: bool(accept={int}) = False
2337
2338 Return a list of the lines in the bytes, breaking at line boundaries.
2339
2340 Line breaks are not included in the resulting list unless keepends is given and
2341 true.
2342 [clinic start generated code]*/
2343
2344 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2345 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2346 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2347 {
2348 return stringlib_splitlines(
2349 (PyObject*) self, PyBytes_AS_STRING(self),
2350 PyBytes_GET_SIZE(self), keepends
2351 );
2352 }
2353
2354 /*[clinic input]
2355 @classmethod
2356 bytes.fromhex
2357
2358 string: unicode
2359 /
2360
2361 Create a bytes object from a string of hexadecimal numbers.
2362
2363 Spaces between two numbers are accepted.
2364 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2365 [clinic start generated code]*/
2366
2367 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2368 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2369 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2370 {
2371 PyObject *result = _PyBytes_FromHex(string, 0);
2372 if (type != &PyBytes_Type && result != NULL) {
2373 Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2374 }
2375 return result;
2376 }
2377
2378 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2379 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2380 {
2381 char *buf;
2382 Py_ssize_t hexlen, invalid_char;
2383 unsigned int top, bot;
2384 const Py_UCS1 *str, *end;
2385 _PyBytesWriter writer;
2386
2387 _PyBytesWriter_Init(&writer);
2388 writer.use_bytearray = use_bytearray;
2389
2390 assert(PyUnicode_Check(string));
2391 if (PyUnicode_READY(string))
2392 return NULL;
2393 hexlen = PyUnicode_GET_LENGTH(string);
2394
2395 if (!PyUnicode_IS_ASCII(string)) {
2396 const void *data = PyUnicode_DATA(string);
2397 unsigned int kind = PyUnicode_KIND(string);
2398 Py_ssize_t i;
2399
2400 /* search for the first non-ASCII character */
2401 for (i = 0; i < hexlen; i++) {
2402 if (PyUnicode_READ(kind, data, i) >= 128)
2403 break;
2404 }
2405 invalid_char = i;
2406 goto error;
2407 }
2408
2409 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2410 str = PyUnicode_1BYTE_DATA(string);
2411
2412 /* This overestimates if there are spaces */
2413 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2414 if (buf == NULL)
2415 return NULL;
2416
2417 end = str + hexlen;
2418 while (str < end) {
2419 /* skip over spaces in the input */
2420 if (Py_ISSPACE(*str)) {
2421 do {
2422 str++;
2423 } while (Py_ISSPACE(*str));
2424 if (str >= end)
2425 break;
2426 }
2427
2428 top = _PyLong_DigitValue[*str];
2429 if (top >= 16) {
2430 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2431 goto error;
2432 }
2433 str++;
2434
2435 bot = _PyLong_DigitValue[*str];
2436 if (bot >= 16) {
2437 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2438 goto error;
2439 }
2440 str++;
2441
2442 *buf++ = (unsigned char)((top << 4) + bot);
2443 }
2444
2445 return _PyBytesWriter_Finish(&writer, buf);
2446
2447 error:
2448 PyErr_Format(PyExc_ValueError,
2449 "non-hexadecimal number found in "
2450 "fromhex() arg at position %zd", invalid_char);
2451 _PyBytesWriter_Dealloc(&writer);
2452 return NULL;
2453 }
2454
2455 /*[clinic input]
2456 bytes.hex
2457
2458 sep: object = NULL
2459 An optional single character or byte to separate hex bytes.
2460 bytes_per_sep: int = 1
2461 How many bytes between separators. Positive values count from the
2462 right, negative values count from the left.
2463
2464 Create a string of hexadecimal numbers from a bytes object.
2465
2466 Example:
2467 >>> value = b'\xb9\x01\xef'
2468 >>> value.hex()
2469 'b901ef'
2470 >>> value.hex(':')
2471 'b9:01:ef'
2472 >>> value.hex(':', 2)
2473 'b9:01ef'
2474 >>> value.hex(':', -2)
2475 'b901:ef'
2476 [clinic start generated code]*/
2477
2478 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2479 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2480 /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2481 {
2482 const char *argbuf = PyBytes_AS_STRING(self);
2483 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2484 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2485 }
2486
2487 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2488 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2489 {
2490 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2491 }
2492
2493
2494 static PyMethodDef
2495 bytes_methods[] = {
2496 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2497 BYTES___BYTES___METHODDEF
2498 {"capitalize", stringlib_capitalize, METH_NOARGS,
2499 _Py_capitalize__doc__},
2500 STRINGLIB_CENTER_METHODDEF
2501 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2502 _Py_count__doc__},
2503 BYTES_DECODE_METHODDEF
2504 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2505 _Py_endswith__doc__},
2506 STRINGLIB_EXPANDTABS_METHODDEF
2507 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2508 _Py_find__doc__},
2509 BYTES_FROMHEX_METHODDEF
2510 BYTES_HEX_METHODDEF
2511 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2512 {"isalnum", stringlib_isalnum, METH_NOARGS,
2513 _Py_isalnum__doc__},
2514 {"isalpha", stringlib_isalpha, METH_NOARGS,
2515 _Py_isalpha__doc__},
2516 {"isascii", stringlib_isascii, METH_NOARGS,
2517 _Py_isascii__doc__},
2518 {"isdigit", stringlib_isdigit, METH_NOARGS,
2519 _Py_isdigit__doc__},
2520 {"islower", stringlib_islower, METH_NOARGS,
2521 _Py_islower__doc__},
2522 {"isspace", stringlib_isspace, METH_NOARGS,
2523 _Py_isspace__doc__},
2524 {"istitle", stringlib_istitle, METH_NOARGS,
2525 _Py_istitle__doc__},
2526 {"isupper", stringlib_isupper, METH_NOARGS,
2527 _Py_isupper__doc__},
2528 BYTES_JOIN_METHODDEF
2529 STRINGLIB_LJUST_METHODDEF
2530 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2531 BYTES_LSTRIP_METHODDEF
2532 BYTES_MAKETRANS_METHODDEF
2533 BYTES_PARTITION_METHODDEF
2534 BYTES_REPLACE_METHODDEF
2535 BYTES_REMOVEPREFIX_METHODDEF
2536 BYTES_REMOVESUFFIX_METHODDEF
2537 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2538 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2539 STRINGLIB_RJUST_METHODDEF
2540 BYTES_RPARTITION_METHODDEF
2541 BYTES_RSPLIT_METHODDEF
2542 BYTES_RSTRIP_METHODDEF
2543 BYTES_SPLIT_METHODDEF
2544 BYTES_SPLITLINES_METHODDEF
2545 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2546 _Py_startswith__doc__},
2547 BYTES_STRIP_METHODDEF
2548 {"swapcase", stringlib_swapcase, METH_NOARGS,
2549 _Py_swapcase__doc__},
2550 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2551 BYTES_TRANSLATE_METHODDEF
2552 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2553 STRINGLIB_ZFILL_METHODDEF
2554 {NULL, NULL} /* sentinel */
2555 };
2556
2557 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2558 bytes_mod(PyObject *self, PyObject *arg)
2559 {
2560 if (!PyBytes_Check(self)) {
2561 Py_RETURN_NOTIMPLEMENTED;
2562 }
2563 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2564 arg, 0);
2565 }
2566
2567 static PyNumberMethods bytes_as_number = {
2568 0, /*nb_add*/
2569 0, /*nb_subtract*/
2570 0, /*nb_multiply*/
2571 bytes_mod, /*nb_remainder*/
2572 };
2573
2574 static PyObject *
2575 bytes_subtype_new(PyTypeObject *, PyObject *);
2576
2577 /*[clinic input]
2578 @classmethod
2579 bytes.__new__ as bytes_new
2580
2581 source as x: object = NULL
2582 encoding: str = NULL
2583 errors: str = NULL
2584
2585 [clinic start generated code]*/
2586
2587 static PyObject *
bytes_new_impl(PyTypeObject * type,PyObject * x,const char * encoding,const char * errors)2588 bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2589 const char *errors)
2590 /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2591 {
2592 PyObject *bytes;
2593 PyObject *func;
2594 Py_ssize_t size;
2595
2596 if (x == NULL) {
2597 if (encoding != NULL || errors != NULL) {
2598 PyErr_SetString(PyExc_TypeError,
2599 encoding != NULL ?
2600 "encoding without a string argument" :
2601 "errors without a string argument");
2602 return NULL;
2603 }
2604 bytes = PyBytes_FromStringAndSize(NULL, 0);
2605 }
2606 else if (encoding != NULL) {
2607 /* Encode via the codec registry */
2608 if (!PyUnicode_Check(x)) {
2609 PyErr_SetString(PyExc_TypeError,
2610 "encoding without a string argument");
2611 return NULL;
2612 }
2613 bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2614 }
2615 else if (errors != NULL) {
2616 PyErr_SetString(PyExc_TypeError,
2617 PyUnicode_Check(x) ?
2618 "string argument without an encoding" :
2619 "errors without a string argument");
2620 return NULL;
2621 }
2622 /* We'd like to call PyObject_Bytes here, but we need to check for an
2623 integer argument before deferring to PyBytes_FromObject, something
2624 PyObject_Bytes doesn't do. */
2625 else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2626 bytes = _PyObject_CallNoArgs(func);
2627 Py_DECREF(func);
2628 if (bytes == NULL)
2629 return NULL;
2630 if (!PyBytes_Check(bytes)) {
2631 PyErr_Format(PyExc_TypeError,
2632 "__bytes__ returned non-bytes (type %.200s)",
2633 Py_TYPE(bytes)->tp_name);
2634 Py_DECREF(bytes);
2635 return NULL;
2636 }
2637 }
2638 else if (PyErr_Occurred())
2639 return NULL;
2640 else if (PyUnicode_Check(x)) {
2641 PyErr_SetString(PyExc_TypeError,
2642 "string argument without an encoding");
2643 return NULL;
2644 }
2645 /* Is it an integer? */
2646 else if (_PyIndex_Check(x)) {
2647 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2648 if (size == -1 && PyErr_Occurred()) {
2649 if (!PyErr_ExceptionMatches(PyExc_TypeError))
2650 return NULL;
2651 PyErr_Clear(); /* fall through */
2652 bytes = PyBytes_FromObject(x);
2653 }
2654 else {
2655 if (size < 0) {
2656 PyErr_SetString(PyExc_ValueError, "negative count");
2657 return NULL;
2658 }
2659 bytes = _PyBytes_FromSize(size, 1);
2660 }
2661 }
2662 else {
2663 bytes = PyBytes_FromObject(x);
2664 }
2665
2666 if (bytes != NULL && type != &PyBytes_Type) {
2667 Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2668 }
2669
2670 return bytes;
2671 }
2672
2673 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2674 _PyBytes_FromBuffer(PyObject *x)
2675 {
2676 PyObject *new;
2677 Py_buffer view;
2678
2679 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2680 return NULL;
2681
2682 new = PyBytes_FromStringAndSize(NULL, view.len);
2683 if (!new)
2684 goto fail;
2685 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2686 &view, view.len, 'C') < 0)
2687 goto fail;
2688 PyBuffer_Release(&view);
2689 return new;
2690
2691 fail:
2692 Py_XDECREF(new);
2693 PyBuffer_Release(&view);
2694 return NULL;
2695 }
2696
2697 static PyObject*
_PyBytes_FromList(PyObject * x)2698 _PyBytes_FromList(PyObject *x)
2699 {
2700 Py_ssize_t i, size = PyList_GET_SIZE(x);
2701 Py_ssize_t value;
2702 char *str;
2703 PyObject *item;
2704 _PyBytesWriter writer;
2705
2706 _PyBytesWriter_Init(&writer);
2707 str = _PyBytesWriter_Alloc(&writer, size);
2708 if (str == NULL)
2709 return NULL;
2710 writer.overallocate = 1;
2711 size = writer.allocated;
2712
2713 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2714 item = PyList_GET_ITEM(x, i);
2715 Py_INCREF(item);
2716 value = PyNumber_AsSsize_t(item, NULL);
2717 Py_DECREF(item);
2718 if (value == -1 && PyErr_Occurred())
2719 goto error;
2720
2721 if (value < 0 || value >= 256) {
2722 PyErr_SetString(PyExc_ValueError,
2723 "bytes must be in range(0, 256)");
2724 goto error;
2725 }
2726
2727 if (i >= size) {
2728 str = _PyBytesWriter_Resize(&writer, str, size+1);
2729 if (str == NULL)
2730 return NULL;
2731 size = writer.allocated;
2732 }
2733 *str++ = (char) value;
2734 }
2735 return _PyBytesWriter_Finish(&writer, str);
2736
2737 error:
2738 _PyBytesWriter_Dealloc(&writer);
2739 return NULL;
2740 }
2741
2742 static PyObject*
_PyBytes_FromTuple(PyObject * x)2743 _PyBytes_FromTuple(PyObject *x)
2744 {
2745 PyObject *bytes;
2746 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2747 Py_ssize_t value;
2748 char *str;
2749 PyObject *item;
2750
2751 bytes = PyBytes_FromStringAndSize(NULL, size);
2752 if (bytes == NULL)
2753 return NULL;
2754 str = ((PyBytesObject *)bytes)->ob_sval;
2755
2756 for (i = 0; i < size; i++) {
2757 item = PyTuple_GET_ITEM(x, i);
2758 value = PyNumber_AsSsize_t(item, NULL);
2759 if (value == -1 && PyErr_Occurred())
2760 goto error;
2761
2762 if (value < 0 || value >= 256) {
2763 PyErr_SetString(PyExc_ValueError,
2764 "bytes must be in range(0, 256)");
2765 goto error;
2766 }
2767 *str++ = (char) value;
2768 }
2769 return bytes;
2770
2771 error:
2772 Py_DECREF(bytes);
2773 return NULL;
2774 }
2775
2776 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2777 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2778 {
2779 char *str;
2780 Py_ssize_t i, size;
2781 _PyBytesWriter writer;
2782
2783 /* For iterator version, create a bytes object and resize as needed */
2784 size = PyObject_LengthHint(x, 64);
2785 if (size == -1 && PyErr_Occurred())
2786 return NULL;
2787
2788 _PyBytesWriter_Init(&writer);
2789 str = _PyBytesWriter_Alloc(&writer, size);
2790 if (str == NULL)
2791 return NULL;
2792 writer.overallocate = 1;
2793 size = writer.allocated;
2794
2795 /* Run the iterator to exhaustion */
2796 for (i = 0; ; i++) {
2797 PyObject *item;
2798 Py_ssize_t value;
2799
2800 /* Get the next item */
2801 item = PyIter_Next(it);
2802 if (item == NULL) {
2803 if (PyErr_Occurred())
2804 goto error;
2805 break;
2806 }
2807
2808 /* Interpret it as an int (__index__) */
2809 value = PyNumber_AsSsize_t(item, NULL);
2810 Py_DECREF(item);
2811 if (value == -1 && PyErr_Occurred())
2812 goto error;
2813
2814 /* Range check */
2815 if (value < 0 || value >= 256) {
2816 PyErr_SetString(PyExc_ValueError,
2817 "bytes must be in range(0, 256)");
2818 goto error;
2819 }
2820
2821 /* Append the byte */
2822 if (i >= size) {
2823 str = _PyBytesWriter_Resize(&writer, str, size+1);
2824 if (str == NULL)
2825 return NULL;
2826 size = writer.allocated;
2827 }
2828 *str++ = (char) value;
2829 }
2830
2831 return _PyBytesWriter_Finish(&writer, str);
2832
2833 error:
2834 _PyBytesWriter_Dealloc(&writer);
2835 return NULL;
2836 }
2837
2838 PyObject *
PyBytes_FromObject(PyObject * x)2839 PyBytes_FromObject(PyObject *x)
2840 {
2841 PyObject *it, *result;
2842
2843 if (x == NULL) {
2844 PyErr_BadInternalCall();
2845 return NULL;
2846 }
2847
2848 if (PyBytes_CheckExact(x)) {
2849 Py_INCREF(x);
2850 return x;
2851 }
2852
2853 /* Use the modern buffer interface */
2854 if (PyObject_CheckBuffer(x))
2855 return _PyBytes_FromBuffer(x);
2856
2857 if (PyList_CheckExact(x))
2858 return _PyBytes_FromList(x);
2859
2860 if (PyTuple_CheckExact(x))
2861 return _PyBytes_FromTuple(x);
2862
2863 if (!PyUnicode_Check(x)) {
2864 it = PyObject_GetIter(x);
2865 if (it != NULL) {
2866 result = _PyBytes_FromIterator(it, x);
2867 Py_DECREF(it);
2868 return result;
2869 }
2870 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2871 return NULL;
2872 }
2873 }
2874
2875 PyErr_Format(PyExc_TypeError,
2876 "cannot convert '%.200s' object to bytes",
2877 Py_TYPE(x)->tp_name);
2878 return NULL;
2879 }
2880
2881 /* This allocator is needed for subclasses don't want to use __new__.
2882 * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
2883 *
2884 * This allocator will be removed when ob_shash is removed.
2885 */
2886 static PyObject *
bytes_alloc(PyTypeObject * self,Py_ssize_t nitems)2887 bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
2888 {
2889 PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
2890 if (obj == NULL) {
2891 return NULL;
2892 }
2893 _Py_COMP_DIAG_PUSH
2894 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2895 obj->ob_shash = -1;
2896 _Py_COMP_DIAG_POP
2897 return (PyObject*)obj;
2898 }
2899
2900 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * tmp)2901 bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2902 {
2903 PyObject *pnew;
2904 Py_ssize_t n;
2905
2906 assert(PyType_IsSubtype(type, &PyBytes_Type));
2907 assert(PyBytes_Check(tmp));
2908 n = PyBytes_GET_SIZE(tmp);
2909 pnew = type->tp_alloc(type, n);
2910 if (pnew != NULL) {
2911 memcpy(PyBytes_AS_STRING(pnew),
2912 PyBytes_AS_STRING(tmp), n+1);
2913 _Py_COMP_DIAG_PUSH
2914 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2915 ((PyBytesObject *)pnew)->ob_shash =
2916 ((PyBytesObject *)tmp)->ob_shash;
2917 _Py_COMP_DIAG_POP
2918 }
2919 return pnew;
2920 }
2921
2922 PyDoc_STRVAR(bytes_doc,
2923 "bytes(iterable_of_ints) -> bytes\n\
2924 bytes(string, encoding[, errors]) -> bytes\n\
2925 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2926 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2927 bytes() -> empty bytes object\n\
2928 \n\
2929 Construct an immutable array of bytes from:\n\
2930 - an iterable yielding integers in range(256)\n\
2931 - a text string encoded using the specified encoding\n\
2932 - any object implementing the buffer API.\n\
2933 - an integer");
2934
2935 static PyObject *bytes_iter(PyObject *seq);
2936
2937 PyTypeObject PyBytes_Type = {
2938 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2939 "bytes",
2940 PyBytesObject_SIZE,
2941 sizeof(char),
2942 0, /* tp_dealloc */
2943 0, /* tp_vectorcall_offset */
2944 0, /* tp_getattr */
2945 0, /* tp_setattr */
2946 0, /* tp_as_async */
2947 (reprfunc)bytes_repr, /* tp_repr */
2948 &bytes_as_number, /* tp_as_number */
2949 &bytes_as_sequence, /* tp_as_sequence */
2950 &bytes_as_mapping, /* tp_as_mapping */
2951 (hashfunc)bytes_hash, /* tp_hash */
2952 0, /* tp_call */
2953 bytes_str, /* tp_str */
2954 PyObject_GenericGetAttr, /* tp_getattro */
2955 0, /* tp_setattro */
2956 &bytes_as_buffer, /* tp_as_buffer */
2957 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2958 Py_TPFLAGS_BYTES_SUBCLASS |
2959 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2960 bytes_doc, /* tp_doc */
2961 0, /* tp_traverse */
2962 0, /* tp_clear */
2963 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2964 0, /* tp_weaklistoffset */
2965 bytes_iter, /* tp_iter */
2966 0, /* tp_iternext */
2967 bytes_methods, /* tp_methods */
2968 0, /* tp_members */
2969 0, /* tp_getset */
2970 0, /* tp_base */
2971 0, /* tp_dict */
2972 0, /* tp_descr_get */
2973 0, /* tp_descr_set */
2974 0, /* tp_dictoffset */
2975 0, /* tp_init */
2976 bytes_alloc, /* tp_alloc */
2977 bytes_new, /* tp_new */
2978 PyObject_Del, /* tp_free */
2979 };
2980
2981 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2982 PyBytes_Concat(PyObject **pv, PyObject *w)
2983 {
2984 assert(pv != NULL);
2985 if (*pv == NULL)
2986 return;
2987 if (w == NULL) {
2988 Py_CLEAR(*pv);
2989 return;
2990 }
2991
2992 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2993 /* Only one reference, so we can resize in place */
2994 Py_ssize_t oldsize;
2995 Py_buffer wb;
2996
2997 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2998 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2999 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3000 Py_CLEAR(*pv);
3001 return;
3002 }
3003
3004 oldsize = PyBytes_GET_SIZE(*pv);
3005 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3006 PyErr_NoMemory();
3007 goto error;
3008 }
3009 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3010 goto error;
3011
3012 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3013 PyBuffer_Release(&wb);
3014 return;
3015
3016 error:
3017 PyBuffer_Release(&wb);
3018 Py_CLEAR(*pv);
3019 return;
3020 }
3021
3022 else {
3023 /* Multiple references, need to create new object */
3024 PyObject *v;
3025 v = bytes_concat(*pv, w);
3026 Py_SETREF(*pv, v);
3027 }
3028 }
3029
3030 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)3031 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3032 {
3033 PyBytes_Concat(pv, w);
3034 Py_XDECREF(w);
3035 }
3036
3037
3038 /* The following function breaks the notion that bytes are immutable:
3039 it changes the size of a bytes object. We get away with this only if there
3040 is only one module referencing the object. You can also think of it
3041 as creating a new bytes object and destroying the old one, only
3042 more efficiently. In any case, don't use this if the bytes object may
3043 already be known to some other part of the code...
3044 Note that if there's not enough memory to resize the bytes object, the
3045 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3046 memory" exception is set, and -1 is returned. Else (on success) 0 is
3047 returned, and the value in *pv may or may not be the same as on input.
3048 As always, an extra byte is allocated for a trailing \0 byte (newsize
3049 does *not* include that), and a trailing \0 byte is stored.
3050 */
3051
3052 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)3053 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3054 {
3055 PyObject *v;
3056 PyBytesObject *sv;
3057 v = *pv;
3058 if (!PyBytes_Check(v) || newsize < 0) {
3059 goto error;
3060 }
3061 if (Py_SIZE(v) == newsize) {
3062 /* return early if newsize equals to v->ob_size */
3063 return 0;
3064 }
3065 if (Py_SIZE(v) == 0) {
3066 if (newsize == 0) {
3067 return 0;
3068 }
3069 *pv = _PyBytes_FromSize(newsize, 0);
3070 Py_DECREF(v);
3071 return (*pv == NULL) ? -1 : 0;
3072 }
3073 if (Py_REFCNT(v) != 1) {
3074 goto error;
3075 }
3076 if (newsize == 0) {
3077 *pv = bytes_new_empty();
3078 Py_DECREF(v);
3079 return 0;
3080 }
3081 /* XXX UNREF/NEWREF interface should be more symmetrical */
3082 #ifdef Py_REF_DEBUG
3083 _Py_RefTotal--;
3084 #endif
3085 #ifdef Py_TRACE_REFS
3086 _Py_ForgetReference(v);
3087 #endif
3088 *pv = (PyObject *)
3089 PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3090 if (*pv == NULL) {
3091 PyObject_Free(v);
3092 PyErr_NoMemory();
3093 return -1;
3094 }
3095 _Py_NewReference(*pv);
3096 sv = (PyBytesObject *) *pv;
3097 Py_SET_SIZE(sv, newsize);
3098 sv->ob_sval[newsize] = '\0';
3099 _Py_COMP_DIAG_PUSH
3100 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
3101 sv->ob_shash = -1; /* invalidate cached hash value */
3102 _Py_COMP_DIAG_POP
3103 return 0;
3104 error:
3105 *pv = 0;
3106 Py_DECREF(v);
3107 PyErr_BadInternalCall();
3108 return -1;
3109 }
3110
3111
3112 PyStatus
_PyBytes_InitTypes(PyInterpreterState * interp)3113 _PyBytes_InitTypes(PyInterpreterState *interp)
3114 {
3115 if (!_Py_IsMainInterpreter(interp)) {
3116 return _PyStatus_OK();
3117 }
3118
3119 if (PyType_Ready(&PyBytes_Type) < 0) {
3120 return _PyStatus_ERR("Can't initialize bytes type");
3121 }
3122
3123 if (PyType_Ready(&PyBytesIter_Type) < 0) {
3124 return _PyStatus_ERR("Can't initialize bytes iterator type");
3125 }
3126
3127 return _PyStatus_OK();
3128 }
3129
3130
3131 /*********************** Bytes Iterator ****************************/
3132
3133 typedef struct {
3134 PyObject_HEAD
3135 Py_ssize_t it_index;
3136 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3137 } striterobject;
3138
3139 static void
striter_dealloc(striterobject * it)3140 striter_dealloc(striterobject *it)
3141 {
3142 _PyObject_GC_UNTRACK(it);
3143 Py_XDECREF(it->it_seq);
3144 PyObject_GC_Del(it);
3145 }
3146
3147 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3148 striter_traverse(striterobject *it, visitproc visit, void *arg)
3149 {
3150 Py_VISIT(it->it_seq);
3151 return 0;
3152 }
3153
3154 static PyObject *
striter_next(striterobject * it)3155 striter_next(striterobject *it)
3156 {
3157 PyBytesObject *seq;
3158
3159 assert(it != NULL);
3160 seq = it->it_seq;
3161 if (seq == NULL)
3162 return NULL;
3163 assert(PyBytes_Check(seq));
3164
3165 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3166 return _PyLong_FromUnsignedChar(
3167 (unsigned char)seq->ob_sval[it->it_index++]);
3168 }
3169
3170 it->it_seq = NULL;
3171 Py_DECREF(seq);
3172 return NULL;
3173 }
3174
3175 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3176 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3177 {
3178 Py_ssize_t len = 0;
3179 if (it->it_seq)
3180 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3181 return PyLong_FromSsize_t(len);
3182 }
3183
3184 PyDoc_STRVAR(length_hint_doc,
3185 "Private method returning an estimate of len(list(it)).");
3186
3187 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3188 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3189 {
3190 PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
3191
3192 /* _PyEval_GetBuiltin can invoke arbitrary code,
3193 * call must be before access of iterator pointers.
3194 * see issue #101765 */
3195
3196 if (it->it_seq != NULL) {
3197 return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
3198 } else {
3199 return Py_BuildValue("N(())", iter);
3200 }
3201 }
3202
3203 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3204
3205 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3206 striter_setstate(striterobject *it, PyObject *state)
3207 {
3208 Py_ssize_t index = PyLong_AsSsize_t(state);
3209 if (index == -1 && PyErr_Occurred())
3210 return NULL;
3211 if (it->it_seq != NULL) {
3212 if (index < 0)
3213 index = 0;
3214 else if (index > PyBytes_GET_SIZE(it->it_seq))
3215 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3216 it->it_index = index;
3217 }
3218 Py_RETURN_NONE;
3219 }
3220
3221 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3222
3223 static PyMethodDef striter_methods[] = {
3224 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3225 length_hint_doc},
3226 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3227 reduce_doc},
3228 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3229 setstate_doc},
3230 {NULL, NULL} /* sentinel */
3231 };
3232
3233 PyTypeObject PyBytesIter_Type = {
3234 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3235 "bytes_iterator", /* tp_name */
3236 sizeof(striterobject), /* tp_basicsize */
3237 0, /* tp_itemsize */
3238 /* methods */
3239 (destructor)striter_dealloc, /* tp_dealloc */
3240 0, /* tp_vectorcall_offset */
3241 0, /* tp_getattr */
3242 0, /* tp_setattr */
3243 0, /* tp_as_async */
3244 0, /* tp_repr */
3245 0, /* tp_as_number */
3246 0, /* tp_as_sequence */
3247 0, /* tp_as_mapping */
3248 0, /* tp_hash */
3249 0, /* tp_call */
3250 0, /* tp_str */
3251 PyObject_GenericGetAttr, /* tp_getattro */
3252 0, /* tp_setattro */
3253 0, /* tp_as_buffer */
3254 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3255 0, /* tp_doc */
3256 (traverseproc)striter_traverse, /* tp_traverse */
3257 0, /* tp_clear */
3258 0, /* tp_richcompare */
3259 0, /* tp_weaklistoffset */
3260 PyObject_SelfIter, /* tp_iter */
3261 (iternextfunc)striter_next, /* tp_iternext */
3262 striter_methods, /* tp_methods */
3263 0,
3264 };
3265
3266 static PyObject *
bytes_iter(PyObject * seq)3267 bytes_iter(PyObject *seq)
3268 {
3269 striterobject *it;
3270
3271 if (!PyBytes_Check(seq)) {
3272 PyErr_BadInternalCall();
3273 return NULL;
3274 }
3275 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3276 if (it == NULL)
3277 return NULL;
3278 it->it_index = 0;
3279 Py_INCREF(seq);
3280 it->it_seq = (PyBytesObject *)seq;
3281 _PyObject_GC_TRACK(it);
3282 return (PyObject *)it;
3283 }
3284
3285
3286 /* _PyBytesWriter API */
3287
3288 #ifdef MS_WINDOWS
3289 /* On Windows, overallocate by 50% is the best factor */
3290 # define OVERALLOCATE_FACTOR 2
3291 #else
3292 /* On Linux, overallocate by 25% is the best factor */
3293 # define OVERALLOCATE_FACTOR 4
3294 #endif
3295
3296 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3297 _PyBytesWriter_Init(_PyBytesWriter *writer)
3298 {
3299 /* Set all attributes before small_buffer to 0 */
3300 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3301 #ifndef NDEBUG
3302 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3303 sizeof(writer->small_buffer));
3304 #endif
3305 }
3306
3307 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3308 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3309 {
3310 Py_CLEAR(writer->buffer);
3311 }
3312
3313 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3314 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3315 {
3316 if (writer->use_small_buffer) {
3317 assert(writer->buffer == NULL);
3318 return writer->small_buffer;
3319 }
3320 else if (writer->use_bytearray) {
3321 assert(writer->buffer != NULL);
3322 return PyByteArray_AS_STRING(writer->buffer);
3323 }
3324 else {
3325 assert(writer->buffer != NULL);
3326 return PyBytes_AS_STRING(writer->buffer);
3327 }
3328 }
3329
3330 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3331 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3332 {
3333 const char *start = _PyBytesWriter_AsString(writer);
3334 assert(str != NULL);
3335 assert(str >= start);
3336 assert(str - start <= writer->allocated);
3337 return str - start;
3338 }
3339
3340 #ifndef NDEBUG
3341 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3342 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3343 {
3344 const char *start, *end;
3345
3346 if (writer->use_small_buffer) {
3347 assert(writer->buffer == NULL);
3348 }
3349 else {
3350 assert(writer->buffer != NULL);
3351 if (writer->use_bytearray)
3352 assert(PyByteArray_CheckExact(writer->buffer));
3353 else
3354 assert(PyBytes_CheckExact(writer->buffer));
3355 assert(Py_REFCNT(writer->buffer) == 1);
3356 }
3357
3358 if (writer->use_bytearray) {
3359 /* bytearray has its own overallocation algorithm,
3360 writer overallocation must be disabled */
3361 assert(!writer->overallocate);
3362 }
3363
3364 assert(0 <= writer->allocated);
3365 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3366 /* the last byte must always be null */
3367 start = _PyBytesWriter_AsString(writer);
3368 assert(start[writer->allocated] == 0);
3369
3370 end = start + writer->allocated;
3371 assert(str != NULL);
3372 assert(start <= str && str <= end);
3373 return 1;
3374 }
3375 #endif
3376
3377 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3378 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3379 {
3380 Py_ssize_t allocated, pos;
3381
3382 assert(_PyBytesWriter_CheckConsistency(writer, str));
3383 assert(writer->allocated < size);
3384
3385 allocated = size;
3386 if (writer->overallocate
3387 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3388 /* overallocate to limit the number of realloc() */
3389 allocated += allocated / OVERALLOCATE_FACTOR;
3390 }
3391
3392 pos = _PyBytesWriter_GetSize(writer, str);
3393 if (!writer->use_small_buffer) {
3394 if (writer->use_bytearray) {
3395 if (PyByteArray_Resize(writer->buffer, allocated))
3396 goto error;
3397 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3398 but we cannot use ob_alloc because bytes may need to be moved
3399 to use the whole buffer. bytearray uses an internal optimization
3400 to avoid moving or copying bytes when bytes are removed at the
3401 beginning (ex: del bytearray[:1]). */
3402 }
3403 else {
3404 if (_PyBytes_Resize(&writer->buffer, allocated))
3405 goto error;
3406 }
3407 }
3408 else {
3409 /* convert from stack buffer to bytes object buffer */
3410 assert(writer->buffer == NULL);
3411
3412 if (writer->use_bytearray)
3413 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3414 else
3415 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3416 if (writer->buffer == NULL)
3417 goto error;
3418
3419 if (pos != 0) {
3420 char *dest;
3421 if (writer->use_bytearray)
3422 dest = PyByteArray_AS_STRING(writer->buffer);
3423 else
3424 dest = PyBytes_AS_STRING(writer->buffer);
3425 memcpy(dest,
3426 writer->small_buffer,
3427 pos);
3428 }
3429
3430 writer->use_small_buffer = 0;
3431 #ifndef NDEBUG
3432 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3433 sizeof(writer->small_buffer));
3434 #endif
3435 }
3436 writer->allocated = allocated;
3437
3438 str = _PyBytesWriter_AsString(writer) + pos;
3439 assert(_PyBytesWriter_CheckConsistency(writer, str));
3440 return str;
3441
3442 error:
3443 _PyBytesWriter_Dealloc(writer);
3444 return NULL;
3445 }
3446
3447 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3448 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3449 {
3450 Py_ssize_t new_min_size;
3451
3452 assert(_PyBytesWriter_CheckConsistency(writer, str));
3453 assert(size >= 0);
3454
3455 if (size == 0) {
3456 /* nothing to do */
3457 return str;
3458 }
3459
3460 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3461 PyErr_NoMemory();
3462 _PyBytesWriter_Dealloc(writer);
3463 return NULL;
3464 }
3465 new_min_size = writer->min_size + size;
3466
3467 if (new_min_size > writer->allocated)
3468 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3469
3470 writer->min_size = new_min_size;
3471 return str;
3472 }
3473
3474 /* Allocate the buffer to write size bytes.
3475 Return the pointer to the beginning of buffer data.
3476 Raise an exception and return NULL on error. */
3477 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3478 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3479 {
3480 /* ensure that _PyBytesWriter_Alloc() is only called once */
3481 assert(writer->min_size == 0 && writer->buffer == NULL);
3482 assert(size >= 0);
3483
3484 writer->use_small_buffer = 1;
3485 #ifndef NDEBUG
3486 writer->allocated = sizeof(writer->small_buffer) - 1;
3487 /* In debug mode, don't use the full small buffer because it is less
3488 efficient than bytes and bytearray objects to detect buffer underflow
3489 and buffer overflow. Use 10 bytes of the small buffer to test also
3490 code using the smaller buffer in debug mode.
3491
3492 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3493 in debug mode to also be able to detect stack overflow when running
3494 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3495 if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3496 stack overflow. */
3497 writer->allocated = Py_MIN(writer->allocated, 10);
3498 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3499 to detect buffer overflow */
3500 writer->small_buffer[writer->allocated] = 0;
3501 #else
3502 writer->allocated = sizeof(writer->small_buffer);
3503 #endif
3504 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3505 }
3506
3507 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3508 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3509 {
3510 Py_ssize_t size;
3511 PyObject *result;
3512
3513 assert(_PyBytesWriter_CheckConsistency(writer, str));
3514
3515 size = _PyBytesWriter_GetSize(writer, str);
3516 if (size == 0 && !writer->use_bytearray) {
3517 Py_CLEAR(writer->buffer);
3518 /* Get the empty byte string singleton */
3519 result = PyBytes_FromStringAndSize(NULL, 0);
3520 }
3521 else if (writer->use_small_buffer) {
3522 if (writer->use_bytearray) {
3523 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3524 }
3525 else {
3526 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3527 }
3528 }
3529 else {
3530 result = writer->buffer;
3531 writer->buffer = NULL;
3532
3533 if (size != writer->allocated) {
3534 if (writer->use_bytearray) {
3535 if (PyByteArray_Resize(result, size)) {
3536 Py_DECREF(result);
3537 return NULL;
3538 }
3539 }
3540 else {
3541 if (_PyBytes_Resize(&result, size)) {
3542 assert(result == NULL);
3543 return NULL;
3544 }
3545 }
3546 }
3547 }
3548 return result;
3549 }
3550
3551 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3552 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3553 const void *bytes, Py_ssize_t size)
3554 {
3555 char *str = (char *)ptr;
3556
3557 str = _PyBytesWriter_Prepare(writer, str, size);
3558 if (str == NULL)
3559 return NULL;
3560
3561 memcpy(str, bytes, size);
3562 str += size;
3563
3564 return str;
3565 }
3566
3567
3568 void
_PyBytes_Repeat(char * dest,Py_ssize_t len_dest,const char * src,Py_ssize_t len_src)3569 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3570 const char* src, Py_ssize_t len_src)
3571 {
3572 if (len_dest == 0) {
3573 return;
3574 }
3575 if (len_src == 1) {
3576 memset(dest, src[0], len_dest);
3577 }
3578 else {
3579 if (src != dest) {
3580 memcpy(dest, src, len_src);
3581 }
3582 Py_ssize_t copied = len_src;
3583 while (copied < len_dest) {
3584 Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3585 memcpy(dest + copied, dest, bytes_to_copy);
3586 copied += bytes_to_copy;
3587 }
3588 }
3589 }
3590
3591