1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "pycore_call.h" // _PyObject_CallNoArgs()
13 #include "pycore_code.h" // _PyCode_New()
14 #include "pycore_hashtable.h" // _Py_hashtable_t
15 #include "marshal.h" // Py_MARSHAL_VERSION
16
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21
22 #include "clinic/marshal.c.h"
23
24 /* High water mark to determine when the marshalled object is dangerously deep
25 * and risks coring the interpreter. When the object stack gets this deep,
26 * raise an exception instead of continuing.
27 * On Windows debug builds, reduce this value.
28 *
29 * BUG: https://bugs.python.org/issue33720
30 * On Windows PGO builds, the r_object function overallocates its stack and
31 * can cause a stack overflow. We reduce the maximum depth for all Windows
32 * releases to protect against this.
33 * #if defined(MS_WINDOWS) && defined(_DEBUG)
34 */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #elif defined(__wasi__)
38 #define MAX_MARSHAL_STACK_DEPTH 1500
39 #else
40 #define MAX_MARSHAL_STACK_DEPTH 2000
41 #endif
42
43 #define TYPE_NULL '0'
44 #define TYPE_NONE 'N'
45 #define TYPE_FALSE 'F'
46 #define TYPE_TRUE 'T'
47 #define TYPE_STOPITER 'S'
48 #define TYPE_ELLIPSIS '.'
49 #define TYPE_INT 'i'
50 /* TYPE_INT64 is not generated anymore.
51 Supported for backward compatibility only. */
52 #define TYPE_INT64 'I'
53 #define TYPE_FLOAT 'f'
54 #define TYPE_BINARY_FLOAT 'g'
55 #define TYPE_COMPLEX 'x'
56 #define TYPE_BINARY_COMPLEX 'y'
57 #define TYPE_LONG 'l'
58 #define TYPE_STRING 's'
59 #define TYPE_INTERNED 't'
60 #define TYPE_REF 'r'
61 #define TYPE_TUPLE '('
62 #define TYPE_LIST '['
63 #define TYPE_DICT '{'
64 #define TYPE_CODE 'c'
65 #define TYPE_UNICODE 'u'
66 #define TYPE_UNKNOWN '?'
67 #define TYPE_SET '<'
68 #define TYPE_FROZENSET '>'
69 #define FLAG_REF '\x80' /* with a type, add obj to index */
70
71 #define TYPE_ASCII 'a'
72 #define TYPE_ASCII_INTERNED 'A'
73 #define TYPE_SMALL_TUPLE ')'
74 #define TYPE_SHORT_ASCII 'z'
75 #define TYPE_SHORT_ASCII_INTERNED 'Z'
76
77 #define WFERR_OK 0
78 #define WFERR_UNMARSHALLABLE 1
79 #define WFERR_NESTEDTOODEEP 2
80 #define WFERR_NOMEMORY 3
81
82 typedef struct {
83 FILE *fp;
84 int error; /* see WFERR_* values */
85 int depth;
86 PyObject *str;
87 char *ptr;
88 const char *end;
89 char *buf;
90 _Py_hashtable_t *hashtable;
91 int version;
92 } WFILE;
93
94 #define w_byte(c, p) do { \
95 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
96 *(p)->ptr++ = (c); \
97 } while(0)
98
99 static void
w_flush(WFILE * p)100 w_flush(WFILE *p)
101 {
102 assert(p->fp != NULL);
103 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
104 p->ptr = p->buf;
105 }
106
107 static int
w_reserve(WFILE * p,Py_ssize_t needed)108 w_reserve(WFILE *p, Py_ssize_t needed)
109 {
110 Py_ssize_t pos, size, delta;
111 if (p->ptr == NULL)
112 return 0; /* An error already occurred */
113 if (p->fp != NULL) {
114 w_flush(p);
115 return needed <= p->end - p->ptr;
116 }
117 assert(p->str != NULL);
118 pos = p->ptr - p->buf;
119 size = PyBytes_GET_SIZE(p->str);
120 if (size > 16*1024*1024)
121 delta = (size >> 3); /* 12.5% overallocation */
122 else
123 delta = size + 1024;
124 delta = Py_MAX(delta, needed);
125 if (delta > PY_SSIZE_T_MAX - size) {
126 p->error = WFERR_NOMEMORY;
127 return 0;
128 }
129 size += delta;
130 if (_PyBytes_Resize(&p->str, size) != 0) {
131 p->end = p->ptr = p->buf = NULL;
132 return 0;
133 }
134 else {
135 p->buf = PyBytes_AS_STRING(p->str);
136 p->ptr = p->buf + pos;
137 p->end = p->buf + size;
138 return 1;
139 }
140 }
141
142 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)143 w_string(const void *s, Py_ssize_t n, WFILE *p)
144 {
145 Py_ssize_t m;
146 if (!n || p->ptr == NULL)
147 return;
148 m = p->end - p->ptr;
149 if (p->fp != NULL) {
150 if (n <= m) {
151 memcpy(p->ptr, s, n);
152 p->ptr += n;
153 }
154 else {
155 w_flush(p);
156 fwrite(s, 1, n, p->fp);
157 }
158 }
159 else {
160 if (n <= m || w_reserve(p, n - m)) {
161 memcpy(p->ptr, s, n);
162 p->ptr += n;
163 }
164 }
165 }
166
167 static void
w_short(int x,WFILE * p)168 w_short(int x, WFILE *p)
169 {
170 w_byte((char)( x & 0xff), p);
171 w_byte((char)((x>> 8) & 0xff), p);
172 }
173
174 static void
w_long(long x,WFILE * p)175 w_long(long x, WFILE *p)
176 {
177 w_byte((char)( x & 0xff), p);
178 w_byte((char)((x>> 8) & 0xff), p);
179 w_byte((char)((x>>16) & 0xff), p);
180 w_byte((char)((x>>24) & 0xff), p);
181 }
182
183 #define SIZE32_MAX 0x7FFFFFFF
184
185 #if SIZEOF_SIZE_T > 4
186 # define W_SIZE(n, p) do { \
187 if ((n) > SIZE32_MAX) { \
188 (p)->depth--; \
189 (p)->error = WFERR_UNMARSHALLABLE; \
190 return; \
191 } \
192 w_long((long)(n), p); \
193 } while(0)
194 #else
195 # define W_SIZE w_long
196 #endif
197
198 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)199 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
200 {
201 W_SIZE(n, p);
202 w_string(s, n, p);
203 }
204
205 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)206 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
207 {
208 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
209 w_string(s, n, p);
210 }
211
212 /* We assume that Python ints are stored internally in base some power of
213 2**15; for the sake of portability we'll always read and write them in base
214 exactly 2**15. */
215
216 #define PyLong_MARSHAL_SHIFT 15
217 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
218 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
219 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
220 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
221 #endif
222 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
223
224 #define W_TYPE(t, p) do { \
225 w_byte((t) | flag, (p)); \
226 } while(0)
227
228 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)229 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
230 {
231 Py_ssize_t i, j, n, l;
232 digit d;
233
234 W_TYPE(TYPE_LONG, p);
235 if (Py_SIZE(ob) == 0) {
236 w_long((long)0, p);
237 return;
238 }
239
240 /* set l to number of base PyLong_MARSHAL_BASE digits */
241 n = Py_ABS(Py_SIZE(ob));
242 l = (n-1) * PyLong_MARSHAL_RATIO;
243 d = ob->ob_digit[n-1];
244 assert(d != 0); /* a PyLong is always normalized */
245 do {
246 d >>= PyLong_MARSHAL_SHIFT;
247 l++;
248 } while (d != 0);
249 if (l > SIZE32_MAX) {
250 p->depth--;
251 p->error = WFERR_UNMARSHALLABLE;
252 return;
253 }
254 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
255
256 for (i=0; i < n-1; i++) {
257 d = ob->ob_digit[i];
258 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
259 w_short(d & PyLong_MARSHAL_MASK, p);
260 d >>= PyLong_MARSHAL_SHIFT;
261 }
262 assert (d == 0);
263 }
264 d = ob->ob_digit[n-1];
265 do {
266 w_short(d & PyLong_MARSHAL_MASK, p);
267 d >>= PyLong_MARSHAL_SHIFT;
268 } while (d != 0);
269 }
270
271 static void
w_float_bin(double v,WFILE * p)272 w_float_bin(double v, WFILE *p)
273 {
274 char buf[8];
275 if (PyFloat_Pack8(v, buf, 1) < 0) {
276 p->error = WFERR_UNMARSHALLABLE;
277 return;
278 }
279 w_string(buf, 8, p);
280 }
281
282 static void
w_float_str(double v,WFILE * p)283 w_float_str(double v, WFILE *p)
284 {
285 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
286 if (!buf) {
287 p->error = WFERR_NOMEMORY;
288 return;
289 }
290 w_short_pstring(buf, strlen(buf), p);
291 PyMem_Free(buf);
292 }
293
294 static int
w_ref(PyObject * v,char * flag,WFILE * p)295 w_ref(PyObject *v, char *flag, WFILE *p)
296 {
297 _Py_hashtable_entry_t *entry;
298 int w;
299
300 if (p->version < 3 || p->hashtable == NULL)
301 return 0; /* not writing object references */
302
303 /* If it has only one reference, it definitely isn't shared.
304 * But we use TYPE_REF always for interned string, to PYC file stable
305 * as possible.
306 */
307 if (Py_REFCNT(v) == 1 &&
308 !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
309 return 0;
310 }
311
312 entry = _Py_hashtable_get_entry(p->hashtable, v);
313 if (entry != NULL) {
314 /* write the reference index to the stream */
315 w = (int)(uintptr_t)entry->value;
316 /* we don't store "long" indices in the dict */
317 assert(0 <= w && w <= 0x7fffffff);
318 w_byte(TYPE_REF, p);
319 w_long(w, p);
320 return 1;
321 } else {
322 size_t s = p->hashtable->nentries;
323 /* we don't support long indices */
324 if (s >= 0x7fffffff) {
325 PyErr_SetString(PyExc_ValueError, "too many objects");
326 goto err;
327 }
328 w = (int)s;
329 Py_INCREF(v);
330 if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
331 Py_DECREF(v);
332 goto err;
333 }
334 *flag |= FLAG_REF;
335 return 0;
336 }
337 err:
338 p->error = WFERR_UNMARSHALLABLE;
339 return 1;
340 }
341
342 static void
343 w_complex_object(PyObject *v, char flag, WFILE *p);
344
345 static void
w_object(PyObject * v,WFILE * p)346 w_object(PyObject *v, WFILE *p)
347 {
348 char flag = '\0';
349
350 p->depth++;
351
352 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
353 p->error = WFERR_NESTEDTOODEEP;
354 }
355 else if (v == NULL) {
356 w_byte(TYPE_NULL, p);
357 }
358 else if (v == Py_None) {
359 w_byte(TYPE_NONE, p);
360 }
361 else if (v == PyExc_StopIteration) {
362 w_byte(TYPE_STOPITER, p);
363 }
364 else if (v == Py_Ellipsis) {
365 w_byte(TYPE_ELLIPSIS, p);
366 }
367 else if (v == Py_False) {
368 w_byte(TYPE_FALSE, p);
369 }
370 else if (v == Py_True) {
371 w_byte(TYPE_TRUE, p);
372 }
373 else if (!w_ref(v, &flag, p))
374 w_complex_object(v, flag, p);
375
376 p->depth--;
377 }
378
379 static void
w_complex_object(PyObject * v,char flag,WFILE * p)380 w_complex_object(PyObject *v, char flag, WFILE *p)
381 {
382 Py_ssize_t i, n;
383
384 if (PyLong_CheckExact(v)) {
385 int overflow;
386 long x = PyLong_AsLongAndOverflow(v, &overflow);
387 if (overflow) {
388 w_PyLong((PyLongObject *)v, flag, p);
389 }
390 else {
391 #if SIZEOF_LONG > 4
392 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
393 if (y && y != -1) {
394 /* Too large for TYPE_INT */
395 w_PyLong((PyLongObject*)v, flag, p);
396 }
397 else
398 #endif
399 {
400 W_TYPE(TYPE_INT, p);
401 w_long(x, p);
402 }
403 }
404 }
405 else if (PyFloat_CheckExact(v)) {
406 if (p->version > 1) {
407 W_TYPE(TYPE_BINARY_FLOAT, p);
408 w_float_bin(PyFloat_AS_DOUBLE(v), p);
409 }
410 else {
411 W_TYPE(TYPE_FLOAT, p);
412 w_float_str(PyFloat_AS_DOUBLE(v), p);
413 }
414 }
415 else if (PyComplex_CheckExact(v)) {
416 if (p->version > 1) {
417 W_TYPE(TYPE_BINARY_COMPLEX, p);
418 w_float_bin(PyComplex_RealAsDouble(v), p);
419 w_float_bin(PyComplex_ImagAsDouble(v), p);
420 }
421 else {
422 W_TYPE(TYPE_COMPLEX, p);
423 w_float_str(PyComplex_RealAsDouble(v), p);
424 w_float_str(PyComplex_ImagAsDouble(v), p);
425 }
426 }
427 else if (PyBytes_CheckExact(v)) {
428 W_TYPE(TYPE_STRING, p);
429 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
430 }
431 else if (PyUnicode_CheckExact(v)) {
432 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
433 int is_short = PyUnicode_GET_LENGTH(v) < 256;
434 if (is_short) {
435 if (PyUnicode_CHECK_INTERNED(v))
436 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
437 else
438 W_TYPE(TYPE_SHORT_ASCII, p);
439 w_short_pstring(PyUnicode_1BYTE_DATA(v),
440 PyUnicode_GET_LENGTH(v), p);
441 }
442 else {
443 if (PyUnicode_CHECK_INTERNED(v))
444 W_TYPE(TYPE_ASCII_INTERNED, p);
445 else
446 W_TYPE(TYPE_ASCII, p);
447 w_pstring(PyUnicode_1BYTE_DATA(v),
448 PyUnicode_GET_LENGTH(v), p);
449 }
450 }
451 else {
452 PyObject *utf8;
453 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
454 if (utf8 == NULL) {
455 p->depth--;
456 p->error = WFERR_UNMARSHALLABLE;
457 return;
458 }
459 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
460 W_TYPE(TYPE_INTERNED, p);
461 else
462 W_TYPE(TYPE_UNICODE, p);
463 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
464 Py_DECREF(utf8);
465 }
466 }
467 else if (PyTuple_CheckExact(v)) {
468 n = PyTuple_GET_SIZE(v);
469 if (p->version >= 4 && n < 256) {
470 W_TYPE(TYPE_SMALL_TUPLE, p);
471 w_byte((unsigned char)n, p);
472 }
473 else {
474 W_TYPE(TYPE_TUPLE, p);
475 W_SIZE(n, p);
476 }
477 for (i = 0; i < n; i++) {
478 w_object(PyTuple_GET_ITEM(v, i), p);
479 }
480 }
481 else if (PyList_CheckExact(v)) {
482 W_TYPE(TYPE_LIST, p);
483 n = PyList_GET_SIZE(v);
484 W_SIZE(n, p);
485 for (i = 0; i < n; i++) {
486 w_object(PyList_GET_ITEM(v, i), p);
487 }
488 }
489 else if (PyDict_CheckExact(v)) {
490 Py_ssize_t pos;
491 PyObject *key, *value;
492 W_TYPE(TYPE_DICT, p);
493 /* This one is NULL object terminated! */
494 pos = 0;
495 while (PyDict_Next(v, &pos, &key, &value)) {
496 w_object(key, p);
497 w_object(value, p);
498 }
499 w_object((PyObject *)NULL, p);
500 }
501 else if (PyAnySet_CheckExact(v)) {
502 PyObject *value;
503 Py_ssize_t pos = 0;
504 Py_hash_t hash;
505
506 if (PyFrozenSet_CheckExact(v))
507 W_TYPE(TYPE_FROZENSET, p);
508 else
509 W_TYPE(TYPE_SET, p);
510 n = PySet_GET_SIZE(v);
511 W_SIZE(n, p);
512 // bpo-37596: To support reproducible builds, sets and frozensets need
513 // to have their elements serialized in a consistent order (even when
514 // they have been scrambled by hash randomization). To ensure this, we
515 // use an order equivalent to sorted(v, key=marshal.dumps):
516 PyObject *pairs = PyList_New(n);
517 if (pairs == NULL) {
518 p->error = WFERR_NOMEMORY;
519 return;
520 }
521 Py_ssize_t i = 0;
522 while (_PySet_NextEntry(v, &pos, &value, &hash)) {
523 PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
524 if (dump == NULL) {
525 p->error = WFERR_UNMARSHALLABLE;
526 Py_DECREF(pairs);
527 return;
528 }
529 PyObject *pair = PyTuple_Pack(2, dump, value);
530 Py_DECREF(dump);
531 if (pair == NULL) {
532 p->error = WFERR_NOMEMORY;
533 Py_DECREF(pairs);
534 return;
535 }
536 PyList_SET_ITEM(pairs, i++, pair);
537 }
538 assert(i == n);
539 if (PyList_Sort(pairs)) {
540 p->error = WFERR_NOMEMORY;
541 Py_DECREF(pairs);
542 return;
543 }
544 for (Py_ssize_t i = 0; i < n; i++) {
545 PyObject *pair = PyList_GET_ITEM(pairs, i);
546 value = PyTuple_GET_ITEM(pair, 1);
547 w_object(value, p);
548 }
549 Py_DECREF(pairs);
550 }
551 else if (PyCode_Check(v)) {
552 PyCodeObject *co = (PyCodeObject *)v;
553 PyObject *co_code = _PyCode_GetCode(co);
554 if (co_code == NULL) {
555 p->error = WFERR_NOMEMORY;
556 return;
557 }
558 W_TYPE(TYPE_CODE, p);
559 w_long(co->co_argcount, p);
560 w_long(co->co_posonlyargcount, p);
561 w_long(co->co_kwonlyargcount, p);
562 w_long(co->co_stacksize, p);
563 w_long(co->co_flags, p);
564 w_object(co_code, p);
565 w_object(co->co_consts, p);
566 w_object(co->co_names, p);
567 w_object(co->co_localsplusnames, p);
568 w_object(co->co_localspluskinds, p);
569 w_object(co->co_filename, p);
570 w_object(co->co_name, p);
571 w_object(co->co_qualname, p);
572 w_long(co->co_firstlineno, p);
573 w_object(co->co_linetable, p);
574 w_object(co->co_exceptiontable, p);
575 Py_DECREF(co_code);
576 }
577 else if (PyObject_CheckBuffer(v)) {
578 /* Write unknown bytes-like objects as a bytes object */
579 Py_buffer view;
580 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
581 w_byte(TYPE_UNKNOWN, p);
582 p->depth--;
583 p->error = WFERR_UNMARSHALLABLE;
584 return;
585 }
586 W_TYPE(TYPE_STRING, p);
587 w_pstring(view.buf, view.len, p);
588 PyBuffer_Release(&view);
589 }
590 else {
591 W_TYPE(TYPE_UNKNOWN, p);
592 p->error = WFERR_UNMARSHALLABLE;
593 }
594 }
595
596 static void
w_decref_entry(void * key)597 w_decref_entry(void *key)
598 {
599 PyObject *entry_key = (PyObject *)key;
600 Py_XDECREF(entry_key);
601 }
602
603 static int
w_init_refs(WFILE * wf,int version)604 w_init_refs(WFILE *wf, int version)
605 {
606 if (version >= 3) {
607 wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
608 _Py_hashtable_compare_direct,
609 w_decref_entry, NULL, NULL);
610 if (wf->hashtable == NULL) {
611 PyErr_NoMemory();
612 return -1;
613 }
614 }
615 return 0;
616 }
617
618 static void
w_clear_refs(WFILE * wf)619 w_clear_refs(WFILE *wf)
620 {
621 if (wf->hashtable != NULL) {
622 _Py_hashtable_destroy(wf->hashtable);
623 }
624 }
625
626 /* version currently has no effect for writing ints. */
627 /* Note that while the documentation states that this function
628 * can error, currently it never does. Setting an exception in
629 * this function should be regarded as an API-breaking change.
630 */
631 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)632 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
633 {
634 char buf[4];
635 WFILE wf;
636 memset(&wf, 0, sizeof(wf));
637 wf.fp = fp;
638 wf.ptr = wf.buf = buf;
639 wf.end = wf.ptr + sizeof(buf);
640 wf.error = WFERR_OK;
641 wf.version = version;
642 w_long(x, &wf);
643 w_flush(&wf);
644 }
645
646 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)647 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
648 {
649 char buf[BUFSIZ];
650 WFILE wf;
651 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
652 return; /* caller must check PyErr_Occurred() */
653 }
654 memset(&wf, 0, sizeof(wf));
655 wf.fp = fp;
656 wf.ptr = wf.buf = buf;
657 wf.end = wf.ptr + sizeof(buf);
658 wf.error = WFERR_OK;
659 wf.version = version;
660 if (w_init_refs(&wf, version)) {
661 return; /* caller must check PyErr_Occurred() */
662 }
663 w_object(x, &wf);
664 w_clear_refs(&wf);
665 w_flush(&wf);
666 }
667
668 typedef struct {
669 FILE *fp;
670 int depth;
671 PyObject *readable; /* Stream-like object being read from */
672 const char *ptr;
673 const char *end;
674 char *buf;
675 Py_ssize_t buf_size;
676 PyObject *refs; /* a list */
677 } RFILE;
678
679 static const char *
r_string(Py_ssize_t n,RFILE * p)680 r_string(Py_ssize_t n, RFILE *p)
681 {
682 Py_ssize_t read = -1;
683
684 if (p->ptr != NULL) {
685 /* Fast path for loads() */
686 const char *res = p->ptr;
687 Py_ssize_t left = p->end - p->ptr;
688 if (left < n) {
689 PyErr_SetString(PyExc_EOFError,
690 "marshal data too short");
691 return NULL;
692 }
693 p->ptr += n;
694 return res;
695 }
696 if (p->buf == NULL) {
697 p->buf = PyMem_Malloc(n);
698 if (p->buf == NULL) {
699 PyErr_NoMemory();
700 return NULL;
701 }
702 p->buf_size = n;
703 }
704 else if (p->buf_size < n) {
705 char *tmp = PyMem_Realloc(p->buf, n);
706 if (tmp == NULL) {
707 PyErr_NoMemory();
708 return NULL;
709 }
710 p->buf = tmp;
711 p->buf_size = n;
712 }
713
714 if (!p->readable) {
715 assert(p->fp != NULL);
716 read = fread(p->buf, 1, n, p->fp);
717 }
718 else {
719 PyObject *res, *mview;
720 Py_buffer buf;
721
722 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
723 return NULL;
724 mview = PyMemoryView_FromBuffer(&buf);
725 if (mview == NULL)
726 return NULL;
727
728 res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
729 if (res != NULL) {
730 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
731 Py_DECREF(res);
732 }
733 }
734 if (read != n) {
735 if (!PyErr_Occurred()) {
736 if (read > n)
737 PyErr_Format(PyExc_ValueError,
738 "read() returned too much data: "
739 "%zd bytes requested, %zd returned",
740 n, read);
741 else
742 PyErr_SetString(PyExc_EOFError,
743 "EOF read where not expected");
744 }
745 return NULL;
746 }
747 return p->buf;
748 }
749
750 static int
r_byte(RFILE * p)751 r_byte(RFILE *p)
752 {
753 int c = EOF;
754
755 if (p->ptr != NULL) {
756 if (p->ptr < p->end)
757 c = (unsigned char) *p->ptr++;
758 return c;
759 }
760 if (!p->readable) {
761 assert(p->fp);
762 c = getc(p->fp);
763 }
764 else {
765 const char *ptr = r_string(1, p);
766 if (ptr != NULL)
767 c = *(const unsigned char *) ptr;
768 }
769 return c;
770 }
771
772 static int
r_short(RFILE * p)773 r_short(RFILE *p)
774 {
775 short x = -1;
776 const unsigned char *buffer;
777
778 buffer = (const unsigned char *) r_string(2, p);
779 if (buffer != NULL) {
780 x = buffer[0];
781 x |= buffer[1] << 8;
782 /* Sign-extension, in case short greater than 16 bits */
783 x |= -(x & 0x8000);
784 }
785 return x;
786 }
787
788 static long
r_long(RFILE * p)789 r_long(RFILE *p)
790 {
791 long x = -1;
792 const unsigned char *buffer;
793
794 buffer = (const unsigned char *) r_string(4, p);
795 if (buffer != NULL) {
796 x = buffer[0];
797 x |= (long)buffer[1] << 8;
798 x |= (long)buffer[2] << 16;
799 x |= (long)buffer[3] << 24;
800 #if SIZEOF_LONG > 4
801 /* Sign extension for 64-bit machines */
802 x |= -(x & 0x80000000L);
803 #endif
804 }
805 return x;
806 }
807
808 /* r_long64 deals with the TYPE_INT64 code. */
809 static PyObject *
r_long64(RFILE * p)810 r_long64(RFILE *p)
811 {
812 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
813 if (buffer == NULL) {
814 return NULL;
815 }
816 return _PyLong_FromByteArray(buffer, 8,
817 1 /* little endian */,
818 1 /* signed */);
819 }
820
821 static PyObject *
r_PyLong(RFILE * p)822 r_PyLong(RFILE *p)
823 {
824 PyLongObject *ob;
825 long n, size, i;
826 int j, md, shorts_in_top_digit;
827 digit d;
828
829 n = r_long(p);
830 if (PyErr_Occurred())
831 return NULL;
832 if (n == 0)
833 return (PyObject *)_PyLong_New(0);
834 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
835 PyErr_SetString(PyExc_ValueError,
836 "bad marshal data (long size out of range)");
837 return NULL;
838 }
839
840 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
841 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
842 ob = _PyLong_New(size);
843 if (ob == NULL)
844 return NULL;
845
846 Py_SET_SIZE(ob, n > 0 ? size : -size);
847
848 for (i = 0; i < size-1; i++) {
849 d = 0;
850 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
851 md = r_short(p);
852 if (PyErr_Occurred()) {
853 Py_DECREF(ob);
854 return NULL;
855 }
856 if (md < 0 || md > PyLong_MARSHAL_BASE)
857 goto bad_digit;
858 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
859 }
860 ob->ob_digit[i] = d;
861 }
862
863 d = 0;
864 for (j=0; j < shorts_in_top_digit; j++) {
865 md = r_short(p);
866 if (PyErr_Occurred()) {
867 Py_DECREF(ob);
868 return NULL;
869 }
870 if (md < 0 || md > PyLong_MARSHAL_BASE)
871 goto bad_digit;
872 /* topmost marshal digit should be nonzero */
873 if (md == 0 && j == shorts_in_top_digit - 1) {
874 Py_DECREF(ob);
875 PyErr_SetString(PyExc_ValueError,
876 "bad marshal data (unnormalized long data)");
877 return NULL;
878 }
879 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
880 }
881 if (PyErr_Occurred()) {
882 Py_DECREF(ob);
883 return NULL;
884 }
885 /* top digit should be nonzero, else the resulting PyLong won't be
886 normalized */
887 ob->ob_digit[size-1] = d;
888 return (PyObject *)ob;
889 bad_digit:
890 Py_DECREF(ob);
891 PyErr_SetString(PyExc_ValueError,
892 "bad marshal data (digit out of range in long)");
893 return NULL;
894 }
895
896 static double
r_float_bin(RFILE * p)897 r_float_bin(RFILE *p)
898 {
899 const char *buf = r_string(8, p);
900 if (buf == NULL)
901 return -1;
902 return PyFloat_Unpack8(buf, 1);
903 }
904
905 /* Issue #33720: Disable inlining for reducing the C stack consumption
906 on PGO builds. */
907 Py_NO_INLINE static double
r_float_str(RFILE * p)908 r_float_str(RFILE *p)
909 {
910 int n;
911 char buf[256];
912 const char *ptr;
913 n = r_byte(p);
914 if (n == EOF) {
915 PyErr_SetString(PyExc_EOFError,
916 "EOF read where object expected");
917 return -1;
918 }
919 ptr = r_string(n, p);
920 if (ptr == NULL) {
921 return -1;
922 }
923 memcpy(buf, ptr, n);
924 buf[n] = '\0';
925 return PyOS_string_to_double(buf, NULL, NULL);
926 }
927
928 /* allocate the reflist index for a new object. Return -1 on failure */
929 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)930 r_ref_reserve(int flag, RFILE *p)
931 {
932 if (flag) { /* currently only FLAG_REF is defined */
933 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
934 if (idx >= 0x7ffffffe) {
935 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
936 return -1;
937 }
938 if (PyList_Append(p->refs, Py_None) < 0)
939 return -1;
940 return idx;
941 } else
942 return 0;
943 }
944
945 /* insert the new object 'o' to the reflist at previously
946 * allocated index 'idx'.
947 * 'o' can be NULL, in which case nothing is done.
948 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
949 * if 'o' was non-NULL, and the function fails, 'o' is released and
950 * NULL returned. This simplifies error checking at the call site since
951 * a single test for NULL for the function result is enough.
952 */
953 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)954 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
955 {
956 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
957 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
958 Py_INCREF(o);
959 PyList_SET_ITEM(p->refs, idx, o);
960 Py_DECREF(tmp);
961 }
962 return o;
963 }
964
965 /* combination of both above, used when an object can be
966 * created whenever it is seen in the file, as opposed to
967 * after having loaded its sub-objects.
968 */
969 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)970 r_ref(PyObject *o, int flag, RFILE *p)
971 {
972 assert(flag & FLAG_REF);
973 if (o == NULL)
974 return NULL;
975 if (PyList_Append(p->refs, o) < 0) {
976 Py_DECREF(o); /* release the new object */
977 return NULL;
978 }
979 return o;
980 }
981
982 static PyObject *
r_object(RFILE * p)983 r_object(RFILE *p)
984 {
985 /* NULL is a valid return value, it does not necessarily means that
986 an exception is set. */
987 PyObject *v, *v2;
988 Py_ssize_t idx = 0;
989 long i, n;
990 int type, code = r_byte(p);
991 int flag, is_interned = 0;
992 PyObject *retval = NULL;
993
994 if (code == EOF) {
995 PyErr_SetString(PyExc_EOFError,
996 "EOF read where object expected");
997 return NULL;
998 }
999
1000 p->depth++;
1001
1002 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1003 p->depth--;
1004 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1005 return NULL;
1006 }
1007
1008 flag = code & FLAG_REF;
1009 type = code & ~FLAG_REF;
1010
1011 #define R_REF(O) do{\
1012 if (flag) \
1013 O = r_ref(O, flag, p);\
1014 } while (0)
1015
1016 switch (type) {
1017
1018 case TYPE_NULL:
1019 break;
1020
1021 case TYPE_NONE:
1022 Py_INCREF(Py_None);
1023 retval = Py_None;
1024 break;
1025
1026 case TYPE_STOPITER:
1027 Py_INCREF(PyExc_StopIteration);
1028 retval = PyExc_StopIteration;
1029 break;
1030
1031 case TYPE_ELLIPSIS:
1032 Py_INCREF(Py_Ellipsis);
1033 retval = Py_Ellipsis;
1034 break;
1035
1036 case TYPE_FALSE:
1037 Py_INCREF(Py_False);
1038 retval = Py_False;
1039 break;
1040
1041 case TYPE_TRUE:
1042 Py_INCREF(Py_True);
1043 retval = Py_True;
1044 break;
1045
1046 case TYPE_INT:
1047 n = r_long(p);
1048 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1049 R_REF(retval);
1050 break;
1051
1052 case TYPE_INT64:
1053 retval = r_long64(p);
1054 R_REF(retval);
1055 break;
1056
1057 case TYPE_LONG:
1058 retval = r_PyLong(p);
1059 R_REF(retval);
1060 break;
1061
1062 case TYPE_FLOAT:
1063 {
1064 double x = r_float_str(p);
1065 if (x == -1.0 && PyErr_Occurred())
1066 break;
1067 retval = PyFloat_FromDouble(x);
1068 R_REF(retval);
1069 break;
1070 }
1071
1072 case TYPE_BINARY_FLOAT:
1073 {
1074 double x = r_float_bin(p);
1075 if (x == -1.0 && PyErr_Occurred())
1076 break;
1077 retval = PyFloat_FromDouble(x);
1078 R_REF(retval);
1079 break;
1080 }
1081
1082 case TYPE_COMPLEX:
1083 {
1084 Py_complex c;
1085 c.real = r_float_str(p);
1086 if (c.real == -1.0 && PyErr_Occurred())
1087 break;
1088 c.imag = r_float_str(p);
1089 if (c.imag == -1.0 && PyErr_Occurred())
1090 break;
1091 retval = PyComplex_FromCComplex(c);
1092 R_REF(retval);
1093 break;
1094 }
1095
1096 case TYPE_BINARY_COMPLEX:
1097 {
1098 Py_complex c;
1099 c.real = r_float_bin(p);
1100 if (c.real == -1.0 && PyErr_Occurred())
1101 break;
1102 c.imag = r_float_bin(p);
1103 if (c.imag == -1.0 && PyErr_Occurred())
1104 break;
1105 retval = PyComplex_FromCComplex(c);
1106 R_REF(retval);
1107 break;
1108 }
1109
1110 case TYPE_STRING:
1111 {
1112 const char *ptr;
1113 n = r_long(p);
1114 if (PyErr_Occurred())
1115 break;
1116 if (n < 0 || n > SIZE32_MAX) {
1117 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1118 break;
1119 }
1120 v = PyBytes_FromStringAndSize((char *)NULL, n);
1121 if (v == NULL)
1122 break;
1123 ptr = r_string(n, p);
1124 if (ptr == NULL) {
1125 Py_DECREF(v);
1126 break;
1127 }
1128 memcpy(PyBytes_AS_STRING(v), ptr, n);
1129 retval = v;
1130 R_REF(retval);
1131 break;
1132 }
1133
1134 case TYPE_ASCII_INTERNED:
1135 is_interned = 1;
1136 /* fall through */
1137 case TYPE_ASCII:
1138 n = r_long(p);
1139 if (PyErr_Occurred())
1140 break;
1141 if (n < 0 || n > SIZE32_MAX) {
1142 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1143 break;
1144 }
1145 goto _read_ascii;
1146
1147 case TYPE_SHORT_ASCII_INTERNED:
1148 is_interned = 1;
1149 /* fall through */
1150 case TYPE_SHORT_ASCII:
1151 n = r_byte(p);
1152 if (n == EOF) {
1153 PyErr_SetString(PyExc_EOFError,
1154 "EOF read where object expected");
1155 break;
1156 }
1157 _read_ascii:
1158 {
1159 const char *ptr;
1160 ptr = r_string(n, p);
1161 if (ptr == NULL)
1162 break;
1163 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1164 if (v == NULL)
1165 break;
1166 if (is_interned)
1167 PyUnicode_InternInPlace(&v);
1168 retval = v;
1169 R_REF(retval);
1170 break;
1171 }
1172
1173 case TYPE_INTERNED:
1174 is_interned = 1;
1175 /* fall through */
1176 case TYPE_UNICODE:
1177 {
1178 const char *buffer;
1179
1180 n = r_long(p);
1181 if (PyErr_Occurred())
1182 break;
1183 if (n < 0 || n > SIZE32_MAX) {
1184 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1185 break;
1186 }
1187 if (n != 0) {
1188 buffer = r_string(n, p);
1189 if (buffer == NULL)
1190 break;
1191 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1192 }
1193 else {
1194 v = PyUnicode_New(0, 0);
1195 }
1196 if (v == NULL)
1197 break;
1198 if (is_interned)
1199 PyUnicode_InternInPlace(&v);
1200 retval = v;
1201 R_REF(retval);
1202 break;
1203 }
1204
1205 case TYPE_SMALL_TUPLE:
1206 n = (unsigned char) r_byte(p);
1207 if (PyErr_Occurred())
1208 break;
1209 goto _read_tuple;
1210 case TYPE_TUPLE:
1211 n = r_long(p);
1212 if (PyErr_Occurred())
1213 break;
1214 if (n < 0 || n > SIZE32_MAX) {
1215 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1216 break;
1217 }
1218 _read_tuple:
1219 v = PyTuple_New(n);
1220 R_REF(v);
1221 if (v == NULL)
1222 break;
1223
1224 for (i = 0; i < n; i++) {
1225 v2 = r_object(p);
1226 if ( v2 == NULL ) {
1227 if (!PyErr_Occurred())
1228 PyErr_SetString(PyExc_TypeError,
1229 "NULL object in marshal data for tuple");
1230 Py_DECREF(v);
1231 v = NULL;
1232 break;
1233 }
1234 PyTuple_SET_ITEM(v, i, v2);
1235 }
1236 retval = v;
1237 break;
1238
1239 case TYPE_LIST:
1240 n = r_long(p);
1241 if (PyErr_Occurred())
1242 break;
1243 if (n < 0 || n > SIZE32_MAX) {
1244 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1245 break;
1246 }
1247 v = PyList_New(n);
1248 R_REF(v);
1249 if (v == NULL)
1250 break;
1251 for (i = 0; i < n; i++) {
1252 v2 = r_object(p);
1253 if ( v2 == NULL ) {
1254 if (!PyErr_Occurred())
1255 PyErr_SetString(PyExc_TypeError,
1256 "NULL object in marshal data for list");
1257 Py_DECREF(v);
1258 v = NULL;
1259 break;
1260 }
1261 PyList_SET_ITEM(v, i, v2);
1262 }
1263 retval = v;
1264 break;
1265
1266 case TYPE_DICT:
1267 v = PyDict_New();
1268 R_REF(v);
1269 if (v == NULL)
1270 break;
1271 for (;;) {
1272 PyObject *key, *val;
1273 key = r_object(p);
1274 if (key == NULL)
1275 break;
1276 val = r_object(p);
1277 if (val == NULL) {
1278 Py_DECREF(key);
1279 break;
1280 }
1281 if (PyDict_SetItem(v, key, val) < 0) {
1282 Py_DECREF(key);
1283 Py_DECREF(val);
1284 break;
1285 }
1286 Py_DECREF(key);
1287 Py_DECREF(val);
1288 }
1289 if (PyErr_Occurred()) {
1290 Py_DECREF(v);
1291 v = NULL;
1292 }
1293 retval = v;
1294 break;
1295
1296 case TYPE_SET:
1297 case TYPE_FROZENSET:
1298 n = r_long(p);
1299 if (PyErr_Occurred())
1300 break;
1301 if (n < 0 || n > SIZE32_MAX) {
1302 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1303 break;
1304 }
1305
1306 if (n == 0 && type == TYPE_FROZENSET) {
1307 /* call frozenset() to get the empty frozenset singleton */
1308 v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1309 if (v == NULL)
1310 break;
1311 R_REF(v);
1312 retval = v;
1313 }
1314 else {
1315 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1316 if (type == TYPE_SET) {
1317 R_REF(v);
1318 } else {
1319 /* must use delayed registration of frozensets because they must
1320 * be init with a refcount of 1
1321 */
1322 idx = r_ref_reserve(flag, p);
1323 if (idx < 0)
1324 Py_CLEAR(v); /* signal error */
1325 }
1326 if (v == NULL)
1327 break;
1328
1329 for (i = 0; i < n; i++) {
1330 v2 = r_object(p);
1331 if ( v2 == NULL ) {
1332 if (!PyErr_Occurred())
1333 PyErr_SetString(PyExc_TypeError,
1334 "NULL object in marshal data for set");
1335 Py_DECREF(v);
1336 v = NULL;
1337 break;
1338 }
1339 if (PySet_Add(v, v2) == -1) {
1340 Py_DECREF(v);
1341 Py_DECREF(v2);
1342 v = NULL;
1343 break;
1344 }
1345 Py_DECREF(v2);
1346 }
1347 if (type != TYPE_SET)
1348 v = r_ref_insert(v, idx, flag, p);
1349 retval = v;
1350 }
1351 break;
1352
1353 case TYPE_CODE:
1354 {
1355 int argcount;
1356 int posonlyargcount;
1357 int kwonlyargcount;
1358 int stacksize;
1359 int flags;
1360 PyObject *code = NULL;
1361 PyObject *consts = NULL;
1362 PyObject *names = NULL;
1363 PyObject *localsplusnames = NULL;
1364 PyObject *localspluskinds = NULL;
1365 PyObject *filename = NULL;
1366 PyObject *name = NULL;
1367 PyObject *qualname = NULL;
1368 int firstlineno;
1369 PyObject* linetable = NULL;
1370 PyObject *exceptiontable = NULL;
1371
1372 idx = r_ref_reserve(flag, p);
1373 if (idx < 0)
1374 break;
1375
1376 v = NULL;
1377
1378 /* XXX ignore long->int overflows for now */
1379 argcount = (int)r_long(p);
1380 if (PyErr_Occurred())
1381 goto code_error;
1382 posonlyargcount = (int)r_long(p);
1383 if (PyErr_Occurred()) {
1384 goto code_error;
1385 }
1386 kwonlyargcount = (int)r_long(p);
1387 if (PyErr_Occurred())
1388 goto code_error;
1389 stacksize = (int)r_long(p);
1390 if (PyErr_Occurred())
1391 goto code_error;
1392 flags = (int)r_long(p);
1393 if (PyErr_Occurred())
1394 goto code_error;
1395 code = r_object(p);
1396 if (code == NULL)
1397 goto code_error;
1398 consts = r_object(p);
1399 if (consts == NULL)
1400 goto code_error;
1401 names = r_object(p);
1402 if (names == NULL)
1403 goto code_error;
1404 localsplusnames = r_object(p);
1405 if (localsplusnames == NULL)
1406 goto code_error;
1407 localspluskinds = r_object(p);
1408 if (localspluskinds == NULL)
1409 goto code_error;
1410 filename = r_object(p);
1411 if (filename == NULL)
1412 goto code_error;
1413 name = r_object(p);
1414 if (name == NULL)
1415 goto code_error;
1416 qualname = r_object(p);
1417 if (qualname == NULL)
1418 goto code_error;
1419 firstlineno = (int)r_long(p);
1420 if (firstlineno == -1 && PyErr_Occurred())
1421 break;
1422 linetable = r_object(p);
1423 if (linetable == NULL)
1424 goto code_error;
1425 exceptiontable = r_object(p);
1426 if (exceptiontable == NULL)
1427 goto code_error;
1428
1429 struct _PyCodeConstructor con = {
1430 .filename = filename,
1431 .name = name,
1432 .qualname = qualname,
1433 .flags = flags,
1434
1435 .code = code,
1436 .firstlineno = firstlineno,
1437 .linetable = linetable,
1438
1439 .consts = consts,
1440 .names = names,
1441
1442 .localsplusnames = localsplusnames,
1443 .localspluskinds = localspluskinds,
1444
1445 .argcount = argcount,
1446 .posonlyargcount = posonlyargcount,
1447 .kwonlyargcount = kwonlyargcount,
1448
1449 .stacksize = stacksize,
1450
1451 .exceptiontable = exceptiontable,
1452 };
1453
1454 if (_PyCode_Validate(&con) < 0) {
1455 goto code_error;
1456 }
1457
1458 v = (PyObject *)_PyCode_New(&con);
1459 if (v == NULL) {
1460 goto code_error;
1461 }
1462
1463 v = r_ref_insert(v, idx, flag, p);
1464
1465 code_error:
1466 Py_XDECREF(code);
1467 Py_XDECREF(consts);
1468 Py_XDECREF(names);
1469 Py_XDECREF(localsplusnames);
1470 Py_XDECREF(localspluskinds);
1471 Py_XDECREF(filename);
1472 Py_XDECREF(name);
1473 Py_XDECREF(qualname);
1474 Py_XDECREF(linetable);
1475 Py_XDECREF(exceptiontable);
1476 }
1477 retval = v;
1478 break;
1479
1480 case TYPE_REF:
1481 n = r_long(p);
1482 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1483 if (n == -1 && PyErr_Occurred())
1484 break;
1485 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1486 break;
1487 }
1488 v = PyList_GET_ITEM(p->refs, n);
1489 if (v == Py_None) {
1490 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1491 break;
1492 }
1493 Py_INCREF(v);
1494 retval = v;
1495 break;
1496
1497 default:
1498 /* Bogus data got written, which isn't ideal.
1499 This will let you keep working and recover. */
1500 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1501 break;
1502
1503 }
1504 p->depth--;
1505 return retval;
1506 }
1507
1508 static PyObject *
read_object(RFILE * p)1509 read_object(RFILE *p)
1510 {
1511 PyObject *v;
1512 if (PyErr_Occurred()) {
1513 fprintf(stderr, "XXX readobject called with exception set\n");
1514 return NULL;
1515 }
1516 if (p->ptr && p->end) {
1517 if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1518 return NULL;
1519 }
1520 } else if (p->fp || p->readable) {
1521 if (PySys_Audit("marshal.load", NULL) < 0) {
1522 return NULL;
1523 }
1524 }
1525 v = r_object(p);
1526 if (v == NULL && !PyErr_Occurred())
1527 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1528 return v;
1529 }
1530
1531 int
PyMarshal_ReadShortFromFile(FILE * fp)1532 PyMarshal_ReadShortFromFile(FILE *fp)
1533 {
1534 RFILE rf;
1535 int res;
1536 assert(fp);
1537 rf.readable = NULL;
1538 rf.fp = fp;
1539 rf.end = rf.ptr = NULL;
1540 rf.buf = NULL;
1541 res = r_short(&rf);
1542 if (rf.buf != NULL)
1543 PyMem_Free(rf.buf);
1544 return res;
1545 }
1546
1547 long
PyMarshal_ReadLongFromFile(FILE * fp)1548 PyMarshal_ReadLongFromFile(FILE *fp)
1549 {
1550 RFILE rf;
1551 long res;
1552 rf.fp = fp;
1553 rf.readable = NULL;
1554 rf.ptr = rf.end = NULL;
1555 rf.buf = NULL;
1556 res = r_long(&rf);
1557 if (rf.buf != NULL)
1558 PyMem_Free(rf.buf);
1559 return res;
1560 }
1561
1562 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1563 static off_t
getfilesize(FILE * fp)1564 getfilesize(FILE *fp)
1565 {
1566 struct _Py_stat_struct st;
1567 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1568 return -1;
1569 #if SIZEOF_OFF_T == 4
1570 else if (st.st_size >= INT_MAX)
1571 return (off_t)INT_MAX;
1572 #endif
1573 else
1574 return (off_t)st.st_size;
1575 }
1576
1577 /* If we can get the size of the file up-front, and it's reasonably small,
1578 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1579 * than reading a byte at a time from file; speeds .pyc imports.
1580 * CAUTION: since this may read the entire remainder of the file, don't
1581 * call it unless you know you're done with the file.
1582 */
1583 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1584 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1585 {
1586 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1587 #define REASONABLE_FILE_LIMIT (1L << 18)
1588 off_t filesize;
1589 filesize = getfilesize(fp);
1590 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1591 char* pBuf = (char *)PyMem_Malloc(filesize);
1592 if (pBuf != NULL) {
1593 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1594 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1595 PyMem_Free(pBuf);
1596 return v;
1597 }
1598
1599 }
1600 /* We don't have fstat, or we do but the file is larger than
1601 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1602 */
1603 return PyMarshal_ReadObjectFromFile(fp);
1604
1605 #undef REASONABLE_FILE_LIMIT
1606 }
1607
1608 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1609 PyMarshal_ReadObjectFromFile(FILE *fp)
1610 {
1611 RFILE rf;
1612 PyObject *result;
1613 rf.fp = fp;
1614 rf.readable = NULL;
1615 rf.depth = 0;
1616 rf.ptr = rf.end = NULL;
1617 rf.buf = NULL;
1618 rf.refs = PyList_New(0);
1619 if (rf.refs == NULL)
1620 return NULL;
1621 result = read_object(&rf);
1622 Py_DECREF(rf.refs);
1623 if (rf.buf != NULL)
1624 PyMem_Free(rf.buf);
1625 return result;
1626 }
1627
1628 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1629 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1630 {
1631 RFILE rf;
1632 PyObject *result;
1633 rf.fp = NULL;
1634 rf.readable = NULL;
1635 rf.ptr = str;
1636 rf.end = str + len;
1637 rf.buf = NULL;
1638 rf.depth = 0;
1639 rf.refs = PyList_New(0);
1640 if (rf.refs == NULL)
1641 return NULL;
1642 result = read_object(&rf);
1643 Py_DECREF(rf.refs);
1644 if (rf.buf != NULL)
1645 PyMem_Free(rf.buf);
1646 return result;
1647 }
1648
1649 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1650 PyMarshal_WriteObjectToString(PyObject *x, int version)
1651 {
1652 WFILE wf;
1653
1654 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1655 return NULL;
1656 }
1657 memset(&wf, 0, sizeof(wf));
1658 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1659 if (wf.str == NULL)
1660 return NULL;
1661 wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1662 wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1663 wf.error = WFERR_OK;
1664 wf.version = version;
1665 if (w_init_refs(&wf, version)) {
1666 Py_DECREF(wf.str);
1667 return NULL;
1668 }
1669 w_object(x, &wf);
1670 w_clear_refs(&wf);
1671 if (wf.str != NULL) {
1672 const char *base = PyBytes_AS_STRING(wf.str);
1673 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1674 return NULL;
1675 }
1676 if (wf.error != WFERR_OK) {
1677 Py_XDECREF(wf.str);
1678 if (wf.error == WFERR_NOMEMORY)
1679 PyErr_NoMemory();
1680 else
1681 PyErr_SetString(PyExc_ValueError,
1682 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1683 :"object too deeply nested to marshal");
1684 return NULL;
1685 }
1686 return wf.str;
1687 }
1688
1689 /* And an interface for Python programs... */
1690 /*[clinic input]
1691 marshal.dump
1692
1693 value: object
1694 Must be a supported type.
1695 file: object
1696 Must be a writeable binary file.
1697 version: int(c_default="Py_MARSHAL_VERSION") = version
1698 Indicates the data format that dump should use.
1699 /
1700
1701 Write the value on the open file.
1702
1703 If the value has (or contains an object that has) an unsupported type, a
1704 ValueError exception is raised - but garbage data will also be written
1705 to the file. The object will not be properly read back by load().
1706 [clinic start generated code]*/
1707
1708 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1709 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1710 int version)
1711 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1712 {
1713 /* XXX Quick hack -- need to do this differently */
1714 PyObject *s;
1715 PyObject *res;
1716
1717 s = PyMarshal_WriteObjectToString(value, version);
1718 if (s == NULL)
1719 return NULL;
1720 res = _PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1721 Py_DECREF(s);
1722 return res;
1723 }
1724
1725 /*[clinic input]
1726 marshal.load
1727
1728 file: object
1729 Must be readable binary file.
1730 /
1731
1732 Read one value from the open file and return it.
1733
1734 If no valid value is read (e.g. because the data has a different Python
1735 version's incompatible marshal format), raise EOFError, ValueError or
1736 TypeError.
1737
1738 Note: If an object containing an unsupported type was marshalled with
1739 dump(), load() will substitute None for the unmarshallable type.
1740 [clinic start generated code]*/
1741
1742 static PyObject *
marshal_load(PyObject * module,PyObject * file)1743 marshal_load(PyObject *module, PyObject *file)
1744 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1745 {
1746 PyObject *data, *result;
1747 RFILE rf;
1748
1749 /*
1750 * Make a call to the read method, but read zero bytes.
1751 * This is to ensure that the object passed in at least
1752 * has a read method which returns bytes.
1753 * This can be removed if we guarantee good error handling
1754 * for r_string()
1755 */
1756 data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1757 if (data == NULL)
1758 return NULL;
1759 if (!PyBytes_Check(data)) {
1760 PyErr_Format(PyExc_TypeError,
1761 "file.read() returned not bytes but %.100s",
1762 Py_TYPE(data)->tp_name);
1763 result = NULL;
1764 }
1765 else {
1766 rf.depth = 0;
1767 rf.fp = NULL;
1768 rf.readable = file;
1769 rf.ptr = rf.end = NULL;
1770 rf.buf = NULL;
1771 if ((rf.refs = PyList_New(0)) != NULL) {
1772 result = read_object(&rf);
1773 Py_DECREF(rf.refs);
1774 if (rf.buf != NULL)
1775 PyMem_Free(rf.buf);
1776 } else
1777 result = NULL;
1778 }
1779 Py_DECREF(data);
1780 return result;
1781 }
1782
1783 /*[clinic input]
1784 marshal.dumps
1785
1786 value: object
1787 Must be a supported type.
1788 version: int(c_default="Py_MARSHAL_VERSION") = version
1789 Indicates the data format that dumps should use.
1790 /
1791
1792 Return the bytes object that would be written to a file by dump(value, file).
1793
1794 Raise a ValueError exception if value has (or contains an object that has) an
1795 unsupported type.
1796 [clinic start generated code]*/
1797
1798 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1799 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1800 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1801 {
1802 return PyMarshal_WriteObjectToString(value, version);
1803 }
1804
1805 /*[clinic input]
1806 marshal.loads
1807
1808 bytes: Py_buffer
1809 /
1810
1811 Convert the bytes-like object to a value.
1812
1813 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1814 bytes in the input are ignored.
1815 [clinic start generated code]*/
1816
1817 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1818 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1819 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1820 {
1821 RFILE rf;
1822 char *s = bytes->buf;
1823 Py_ssize_t n = bytes->len;
1824 PyObject* result;
1825 rf.fp = NULL;
1826 rf.readable = NULL;
1827 rf.ptr = s;
1828 rf.end = s + n;
1829 rf.depth = 0;
1830 if ((rf.refs = PyList_New(0)) == NULL)
1831 return NULL;
1832 result = read_object(&rf);
1833 Py_DECREF(rf.refs);
1834 return result;
1835 }
1836
1837 static PyMethodDef marshal_methods[] = {
1838 MARSHAL_DUMP_METHODDEF
1839 MARSHAL_LOAD_METHODDEF
1840 MARSHAL_DUMPS_METHODDEF
1841 MARSHAL_LOADS_METHODDEF
1842 {NULL, NULL} /* sentinel */
1843 };
1844
1845
1846 PyDoc_STRVAR(module_doc,
1847 "This module contains functions that can read and write Python values in\n\
1848 a binary format. The format is specific to Python, but independent of\n\
1849 machine architecture issues.\n\
1850 \n\
1851 Not all Python object types are supported; in general, only objects\n\
1852 whose value is independent from a particular invocation of Python can be\n\
1853 written and read by this module. The following types are supported:\n\
1854 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1855 tuples, lists, sets, dictionaries, and code objects, where it\n\
1856 should be understood that tuples, lists and dictionaries are only\n\
1857 supported as long as the values contained therein are themselves\n\
1858 supported; and recursive lists and dictionaries should not be written\n\
1859 (they will cause infinite loops).\n\
1860 \n\
1861 Variables:\n\
1862 \n\
1863 version -- indicates the format that the module uses. Version 0 is the\n\
1864 historical format, version 1 shares interned strings and version 2\n\
1865 uses a binary format for floating point numbers.\n\
1866 Version 3 shares common object references (New in version 3.4).\n\
1867 \n\
1868 Functions:\n\
1869 \n\
1870 dump() -- write value to a file\n\
1871 load() -- read value from a file\n\
1872 dumps() -- marshal value as a bytes object\n\
1873 loads() -- read value from a bytes-like object");
1874
1875
1876 static int
marshal_module_exec(PyObject * mod)1877 marshal_module_exec(PyObject *mod)
1878 {
1879 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1880 return -1;
1881 }
1882 return 0;
1883 }
1884
1885 static PyModuleDef_Slot marshalmodule_slots[] = {
1886 {Py_mod_exec, marshal_module_exec},
1887 {0, NULL}
1888 };
1889
1890 static struct PyModuleDef marshalmodule = {
1891 PyModuleDef_HEAD_INIT,
1892 .m_name = "marshal",
1893 .m_doc = module_doc,
1894 .m_methods = marshal_methods,
1895 .m_slots = marshalmodule_slots,
1896 };
1897
1898 PyMODINIT_FUNC
PyMarshal_Init(void)1899 PyMarshal_Init(void)
1900 {
1901 return PyModuleDef_Init(&marshalmodule);
1902 }
1903