1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "pycore_call.h"          // _PyObject_CallNoArgs()
13 #include "pycore_code.h"          // _PyCode_New()
14 #include "pycore_hashtable.h"     // _Py_hashtable_t
15 #include "marshal.h"              // Py_MARSHAL_VERSION
16 
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21 
22 #include "clinic/marshal.c.h"
23 
24 /* High water mark to determine when the marshalled object is dangerously deep
25  * and risks coring the interpreter.  When the object stack gets this deep,
26  * raise an exception instead of continuing.
27  * On Windows debug builds, reduce this value.
28  *
29  * BUG: https://bugs.python.org/issue33720
30  * On Windows PGO builds, the r_object function overallocates its stack and
31  * can cause a stack overflow. We reduce the maximum depth for all Windows
32  * releases to protect against this.
33  * #if defined(MS_WINDOWS) && defined(_DEBUG)
34  */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #elif defined(__wasi__)
38 #define MAX_MARSHAL_STACK_DEPTH 1500
39 #else
40 #define MAX_MARSHAL_STACK_DEPTH 2000
41 #endif
42 
43 #define TYPE_NULL               '0'
44 #define TYPE_NONE               'N'
45 #define TYPE_FALSE              'F'
46 #define TYPE_TRUE               'T'
47 #define TYPE_STOPITER           'S'
48 #define TYPE_ELLIPSIS           '.'
49 #define TYPE_INT                'i'
50 /* TYPE_INT64 is not generated anymore.
51    Supported for backward compatibility only. */
52 #define TYPE_INT64              'I'
53 #define TYPE_FLOAT              'f'
54 #define TYPE_BINARY_FLOAT       'g'
55 #define TYPE_COMPLEX            'x'
56 #define TYPE_BINARY_COMPLEX     'y'
57 #define TYPE_LONG               'l'
58 #define TYPE_STRING             's'
59 #define TYPE_INTERNED           't'
60 #define TYPE_REF                'r'
61 #define TYPE_TUPLE              '('
62 #define TYPE_LIST               '['
63 #define TYPE_DICT               '{'
64 #define TYPE_CODE               'c'
65 #define TYPE_UNICODE            'u'
66 #define TYPE_UNKNOWN            '?'
67 #define TYPE_SET                '<'
68 #define TYPE_FROZENSET          '>'
69 #define FLAG_REF                '\x80' /* with a type, add obj to index */
70 
71 #define TYPE_ASCII              'a'
72 #define TYPE_ASCII_INTERNED     'A'
73 #define TYPE_SMALL_TUPLE        ')'
74 #define TYPE_SHORT_ASCII        'z'
75 #define TYPE_SHORT_ASCII_INTERNED 'Z'
76 
77 #define WFERR_OK 0
78 #define WFERR_UNMARSHALLABLE 1
79 #define WFERR_NESTEDTOODEEP 2
80 #define WFERR_NOMEMORY 3
81 
82 typedef struct {
83     FILE *fp;
84     int error;  /* see WFERR_* values */
85     int depth;
86     PyObject *str;
87     char *ptr;
88     const char *end;
89     char *buf;
90     _Py_hashtable_t *hashtable;
91     int version;
92 } WFILE;
93 
94 #define w_byte(c, p) do {                               \
95         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
96             *(p)->ptr++ = (c);                          \
97     } while(0)
98 
99 static void
w_flush(WFILE * p)100 w_flush(WFILE *p)
101 {
102     assert(p->fp != NULL);
103     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
104     p->ptr = p->buf;
105 }
106 
107 static int
w_reserve(WFILE * p,Py_ssize_t needed)108 w_reserve(WFILE *p, Py_ssize_t needed)
109 {
110     Py_ssize_t pos, size, delta;
111     if (p->ptr == NULL)
112         return 0; /* An error already occurred */
113     if (p->fp != NULL) {
114         w_flush(p);
115         return needed <= p->end - p->ptr;
116     }
117     assert(p->str != NULL);
118     pos = p->ptr - p->buf;
119     size = PyBytes_GET_SIZE(p->str);
120     if (size > 16*1024*1024)
121         delta = (size >> 3);            /* 12.5% overallocation */
122     else
123         delta = size + 1024;
124     delta = Py_MAX(delta, needed);
125     if (delta > PY_SSIZE_T_MAX - size) {
126         p->error = WFERR_NOMEMORY;
127         return 0;
128     }
129     size += delta;
130     if (_PyBytes_Resize(&p->str, size) != 0) {
131         p->end = p->ptr = p->buf = NULL;
132         return 0;
133     }
134     else {
135         p->buf = PyBytes_AS_STRING(p->str);
136         p->ptr = p->buf + pos;
137         p->end = p->buf + size;
138         return 1;
139     }
140 }
141 
142 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)143 w_string(const void *s, Py_ssize_t n, WFILE *p)
144 {
145     Py_ssize_t m;
146     if (!n || p->ptr == NULL)
147         return;
148     m = p->end - p->ptr;
149     if (p->fp != NULL) {
150         if (n <= m) {
151             memcpy(p->ptr, s, n);
152             p->ptr += n;
153         }
154         else {
155             w_flush(p);
156             fwrite(s, 1, n, p->fp);
157         }
158     }
159     else {
160         if (n <= m || w_reserve(p, n - m)) {
161             memcpy(p->ptr, s, n);
162             p->ptr += n;
163         }
164     }
165 }
166 
167 static void
w_short(int x,WFILE * p)168 w_short(int x, WFILE *p)
169 {
170     w_byte((char)( x      & 0xff), p);
171     w_byte((char)((x>> 8) & 0xff), p);
172 }
173 
174 static void
w_long(long x,WFILE * p)175 w_long(long x, WFILE *p)
176 {
177     w_byte((char)( x      & 0xff), p);
178     w_byte((char)((x>> 8) & 0xff), p);
179     w_byte((char)((x>>16) & 0xff), p);
180     w_byte((char)((x>>24) & 0xff), p);
181 }
182 
183 #define SIZE32_MAX  0x7FFFFFFF
184 
185 #if SIZEOF_SIZE_T > 4
186 # define W_SIZE(n, p)  do {                     \
187         if ((n) > SIZE32_MAX) {                 \
188             (p)->depth--;                       \
189             (p)->error = WFERR_UNMARSHALLABLE;  \
190             return;                             \
191         }                                       \
192         w_long((long)(n), p);                   \
193     } while(0)
194 #else
195 # define W_SIZE  w_long
196 #endif
197 
198 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)199 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
200 {
201         W_SIZE(n, p);
202         w_string(s, n, p);
203 }
204 
205 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)206 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
207 {
208     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
209     w_string(s, n, p);
210 }
211 
212 /* We assume that Python ints are stored internally in base some power of
213    2**15; for the sake of portability we'll always read and write them in base
214    exactly 2**15. */
215 
216 #define PyLong_MARSHAL_SHIFT 15
217 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
218 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
219 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
220 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
221 #endif
222 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
223 
224 #define W_TYPE(t, p) do { \
225     w_byte((t) | flag, (p)); \
226 } while(0)
227 
228 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)229 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
230 {
231     Py_ssize_t i, j, n, l;
232     digit d;
233 
234     W_TYPE(TYPE_LONG, p);
235     if (Py_SIZE(ob) == 0) {
236         w_long((long)0, p);
237         return;
238     }
239 
240     /* set l to number of base PyLong_MARSHAL_BASE digits */
241     n = Py_ABS(Py_SIZE(ob));
242     l = (n-1) * PyLong_MARSHAL_RATIO;
243     d = ob->ob_digit[n-1];
244     assert(d != 0); /* a PyLong is always normalized */
245     do {
246         d >>= PyLong_MARSHAL_SHIFT;
247         l++;
248     } while (d != 0);
249     if (l > SIZE32_MAX) {
250         p->depth--;
251         p->error = WFERR_UNMARSHALLABLE;
252         return;
253     }
254     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
255 
256     for (i=0; i < n-1; i++) {
257         d = ob->ob_digit[i];
258         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
259             w_short(d & PyLong_MARSHAL_MASK, p);
260             d >>= PyLong_MARSHAL_SHIFT;
261         }
262         assert (d == 0);
263     }
264     d = ob->ob_digit[n-1];
265     do {
266         w_short(d & PyLong_MARSHAL_MASK, p);
267         d >>= PyLong_MARSHAL_SHIFT;
268     } while (d != 0);
269 }
270 
271 static void
w_float_bin(double v,WFILE * p)272 w_float_bin(double v, WFILE *p)
273 {
274     char buf[8];
275     if (PyFloat_Pack8(v, buf, 1) < 0) {
276         p->error = WFERR_UNMARSHALLABLE;
277         return;
278     }
279     w_string(buf, 8, p);
280 }
281 
282 static void
w_float_str(double v,WFILE * p)283 w_float_str(double v, WFILE *p)
284 {
285     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
286     if (!buf) {
287         p->error = WFERR_NOMEMORY;
288         return;
289     }
290     w_short_pstring(buf, strlen(buf), p);
291     PyMem_Free(buf);
292 }
293 
294 static int
w_ref(PyObject * v,char * flag,WFILE * p)295 w_ref(PyObject *v, char *flag, WFILE *p)
296 {
297     _Py_hashtable_entry_t *entry;
298     int w;
299 
300     if (p->version < 3 || p->hashtable == NULL)
301         return 0; /* not writing object references */
302 
303     /* If it has only one reference, it definitely isn't shared.
304      * But we use TYPE_REF always for interned string, to PYC file stable
305      * as possible.
306      */
307     if (Py_REFCNT(v) == 1 &&
308             !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
309         return 0;
310     }
311 
312     entry = _Py_hashtable_get_entry(p->hashtable, v);
313     if (entry != NULL) {
314         /* write the reference index to the stream */
315         w = (int)(uintptr_t)entry->value;
316         /* we don't store "long" indices in the dict */
317         assert(0 <= w && w <= 0x7fffffff);
318         w_byte(TYPE_REF, p);
319         w_long(w, p);
320         return 1;
321     } else {
322         size_t s = p->hashtable->nentries;
323         /* we don't support long indices */
324         if (s >= 0x7fffffff) {
325             PyErr_SetString(PyExc_ValueError, "too many objects");
326             goto err;
327         }
328         w = (int)s;
329         Py_INCREF(v);
330         if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
331             Py_DECREF(v);
332             goto err;
333         }
334         *flag |= FLAG_REF;
335         return 0;
336     }
337 err:
338     p->error = WFERR_UNMARSHALLABLE;
339     return 1;
340 }
341 
342 static void
343 w_complex_object(PyObject *v, char flag, WFILE *p);
344 
345 static void
w_object(PyObject * v,WFILE * p)346 w_object(PyObject *v, WFILE *p)
347 {
348     char flag = '\0';
349 
350     p->depth++;
351 
352     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
353         p->error = WFERR_NESTEDTOODEEP;
354     }
355     else if (v == NULL) {
356         w_byte(TYPE_NULL, p);
357     }
358     else if (v == Py_None) {
359         w_byte(TYPE_NONE, p);
360     }
361     else if (v == PyExc_StopIteration) {
362         w_byte(TYPE_STOPITER, p);
363     }
364     else if (v == Py_Ellipsis) {
365         w_byte(TYPE_ELLIPSIS, p);
366     }
367     else if (v == Py_False) {
368         w_byte(TYPE_FALSE, p);
369     }
370     else if (v == Py_True) {
371         w_byte(TYPE_TRUE, p);
372     }
373     else if (!w_ref(v, &flag, p))
374         w_complex_object(v, flag, p);
375 
376     p->depth--;
377 }
378 
379 static void
w_complex_object(PyObject * v,char flag,WFILE * p)380 w_complex_object(PyObject *v, char flag, WFILE *p)
381 {
382     Py_ssize_t i, n;
383 
384     if (PyLong_CheckExact(v)) {
385         int overflow;
386         long x = PyLong_AsLongAndOverflow(v, &overflow);
387         if (overflow) {
388             w_PyLong((PyLongObject *)v, flag, p);
389         }
390         else {
391 #if SIZEOF_LONG > 4
392             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
393             if (y && y != -1) {
394                 /* Too large for TYPE_INT */
395                 w_PyLong((PyLongObject*)v, flag, p);
396             }
397             else
398 #endif
399             {
400                 W_TYPE(TYPE_INT, p);
401                 w_long(x, p);
402             }
403         }
404     }
405     else if (PyFloat_CheckExact(v)) {
406         if (p->version > 1) {
407             W_TYPE(TYPE_BINARY_FLOAT, p);
408             w_float_bin(PyFloat_AS_DOUBLE(v), p);
409         }
410         else {
411             W_TYPE(TYPE_FLOAT, p);
412             w_float_str(PyFloat_AS_DOUBLE(v), p);
413         }
414     }
415     else if (PyComplex_CheckExact(v)) {
416         if (p->version > 1) {
417             W_TYPE(TYPE_BINARY_COMPLEX, p);
418             w_float_bin(PyComplex_RealAsDouble(v), p);
419             w_float_bin(PyComplex_ImagAsDouble(v), p);
420         }
421         else {
422             W_TYPE(TYPE_COMPLEX, p);
423             w_float_str(PyComplex_RealAsDouble(v), p);
424             w_float_str(PyComplex_ImagAsDouble(v), p);
425         }
426     }
427     else if (PyBytes_CheckExact(v)) {
428         W_TYPE(TYPE_STRING, p);
429         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
430     }
431     else if (PyUnicode_CheckExact(v)) {
432         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
433             int is_short = PyUnicode_GET_LENGTH(v) < 256;
434             if (is_short) {
435                 if (PyUnicode_CHECK_INTERNED(v))
436                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
437                 else
438                     W_TYPE(TYPE_SHORT_ASCII, p);
439                 w_short_pstring(PyUnicode_1BYTE_DATA(v),
440                                 PyUnicode_GET_LENGTH(v), p);
441             }
442             else {
443                 if (PyUnicode_CHECK_INTERNED(v))
444                     W_TYPE(TYPE_ASCII_INTERNED, p);
445                 else
446                     W_TYPE(TYPE_ASCII, p);
447                 w_pstring(PyUnicode_1BYTE_DATA(v),
448                           PyUnicode_GET_LENGTH(v), p);
449             }
450         }
451         else {
452             PyObject *utf8;
453             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
454             if (utf8 == NULL) {
455                 p->depth--;
456                 p->error = WFERR_UNMARSHALLABLE;
457                 return;
458             }
459             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
460                 W_TYPE(TYPE_INTERNED, p);
461             else
462                 W_TYPE(TYPE_UNICODE, p);
463             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
464             Py_DECREF(utf8);
465         }
466     }
467     else if (PyTuple_CheckExact(v)) {
468         n = PyTuple_GET_SIZE(v);
469         if (p->version >= 4 && n < 256) {
470             W_TYPE(TYPE_SMALL_TUPLE, p);
471             w_byte((unsigned char)n, p);
472         }
473         else {
474             W_TYPE(TYPE_TUPLE, p);
475             W_SIZE(n, p);
476         }
477         for (i = 0; i < n; i++) {
478             w_object(PyTuple_GET_ITEM(v, i), p);
479         }
480     }
481     else if (PyList_CheckExact(v)) {
482         W_TYPE(TYPE_LIST, p);
483         n = PyList_GET_SIZE(v);
484         W_SIZE(n, p);
485         for (i = 0; i < n; i++) {
486             w_object(PyList_GET_ITEM(v, i), p);
487         }
488     }
489     else if (PyDict_CheckExact(v)) {
490         Py_ssize_t pos;
491         PyObject *key, *value;
492         W_TYPE(TYPE_DICT, p);
493         /* This one is NULL object terminated! */
494         pos = 0;
495         while (PyDict_Next(v, &pos, &key, &value)) {
496             w_object(key, p);
497             w_object(value, p);
498         }
499         w_object((PyObject *)NULL, p);
500     }
501     else if (PyAnySet_CheckExact(v)) {
502         PyObject *value;
503         Py_ssize_t pos = 0;
504         Py_hash_t hash;
505 
506         if (PyFrozenSet_CheckExact(v))
507             W_TYPE(TYPE_FROZENSET, p);
508         else
509             W_TYPE(TYPE_SET, p);
510         n = PySet_GET_SIZE(v);
511         W_SIZE(n, p);
512         // bpo-37596: To support reproducible builds, sets and frozensets need
513         // to have their elements serialized in a consistent order (even when
514         // they have been scrambled by hash randomization). To ensure this, we
515         // use an order equivalent to sorted(v, key=marshal.dumps):
516         PyObject *pairs = PyList_New(n);
517         if (pairs == NULL) {
518             p->error = WFERR_NOMEMORY;
519             return;
520         }
521         Py_ssize_t i = 0;
522         while (_PySet_NextEntry(v, &pos, &value, &hash)) {
523             PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
524             if (dump == NULL) {
525                 p->error = WFERR_UNMARSHALLABLE;
526                 Py_DECREF(pairs);
527                 return;
528             }
529             PyObject *pair = PyTuple_Pack(2, dump, value);
530             Py_DECREF(dump);
531             if (pair == NULL) {
532                 p->error = WFERR_NOMEMORY;
533                 Py_DECREF(pairs);
534                 return;
535             }
536             PyList_SET_ITEM(pairs, i++, pair);
537         }
538         assert(i == n);
539         if (PyList_Sort(pairs)) {
540             p->error = WFERR_NOMEMORY;
541             Py_DECREF(pairs);
542             return;
543         }
544         for (Py_ssize_t i = 0; i < n; i++) {
545             PyObject *pair = PyList_GET_ITEM(pairs, i);
546             value = PyTuple_GET_ITEM(pair, 1);
547             w_object(value, p);
548         }
549         Py_DECREF(pairs);
550     }
551     else if (PyCode_Check(v)) {
552         PyCodeObject *co = (PyCodeObject *)v;
553         PyObject *co_code = _PyCode_GetCode(co);
554         if (co_code == NULL) {
555             p->error = WFERR_NOMEMORY;
556             return;
557         }
558         W_TYPE(TYPE_CODE, p);
559         w_long(co->co_argcount, p);
560         w_long(co->co_posonlyargcount, p);
561         w_long(co->co_kwonlyargcount, p);
562         w_long(co->co_stacksize, p);
563         w_long(co->co_flags, p);
564         w_object(co_code, p);
565         w_object(co->co_consts, p);
566         w_object(co->co_names, p);
567         w_object(co->co_localsplusnames, p);
568         w_object(co->co_localspluskinds, p);
569         w_object(co->co_filename, p);
570         w_object(co->co_name, p);
571         w_object(co->co_qualname, p);
572         w_long(co->co_firstlineno, p);
573         w_object(co->co_linetable, p);
574         w_object(co->co_exceptiontable, p);
575         Py_DECREF(co_code);
576     }
577     else if (PyObject_CheckBuffer(v)) {
578         /* Write unknown bytes-like objects as a bytes object */
579         Py_buffer view;
580         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
581             w_byte(TYPE_UNKNOWN, p);
582             p->depth--;
583             p->error = WFERR_UNMARSHALLABLE;
584             return;
585         }
586         W_TYPE(TYPE_STRING, p);
587         w_pstring(view.buf, view.len, p);
588         PyBuffer_Release(&view);
589     }
590     else {
591         W_TYPE(TYPE_UNKNOWN, p);
592         p->error = WFERR_UNMARSHALLABLE;
593     }
594 }
595 
596 static void
w_decref_entry(void * key)597 w_decref_entry(void *key)
598 {
599     PyObject *entry_key = (PyObject *)key;
600     Py_XDECREF(entry_key);
601 }
602 
603 static int
w_init_refs(WFILE * wf,int version)604 w_init_refs(WFILE *wf, int version)
605 {
606     if (version >= 3) {
607         wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
608                                                _Py_hashtable_compare_direct,
609                                                w_decref_entry, NULL, NULL);
610         if (wf->hashtable == NULL) {
611             PyErr_NoMemory();
612             return -1;
613         }
614     }
615     return 0;
616 }
617 
618 static void
w_clear_refs(WFILE * wf)619 w_clear_refs(WFILE *wf)
620 {
621     if (wf->hashtable != NULL) {
622         _Py_hashtable_destroy(wf->hashtable);
623     }
624 }
625 
626 /* version currently has no effect for writing ints. */
627 /* Note that while the documentation states that this function
628  * can error, currently it never does. Setting an exception in
629  * this function should be regarded as an API-breaking change.
630  */
631 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)632 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
633 {
634     char buf[4];
635     WFILE wf;
636     memset(&wf, 0, sizeof(wf));
637     wf.fp = fp;
638     wf.ptr = wf.buf = buf;
639     wf.end = wf.ptr + sizeof(buf);
640     wf.error = WFERR_OK;
641     wf.version = version;
642     w_long(x, &wf);
643     w_flush(&wf);
644 }
645 
646 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)647 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
648 {
649     char buf[BUFSIZ];
650     WFILE wf;
651     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
652         return; /* caller must check PyErr_Occurred() */
653     }
654     memset(&wf, 0, sizeof(wf));
655     wf.fp = fp;
656     wf.ptr = wf.buf = buf;
657     wf.end = wf.ptr + sizeof(buf);
658     wf.error = WFERR_OK;
659     wf.version = version;
660     if (w_init_refs(&wf, version)) {
661         return; /* caller must check PyErr_Occurred() */
662     }
663     w_object(x, &wf);
664     w_clear_refs(&wf);
665     w_flush(&wf);
666 }
667 
668 typedef struct {
669     FILE *fp;
670     int depth;
671     PyObject *readable;  /* Stream-like object being read from */
672     const char *ptr;
673     const char *end;
674     char *buf;
675     Py_ssize_t buf_size;
676     PyObject *refs;  /* a list */
677 } RFILE;
678 
679 static const char *
r_string(Py_ssize_t n,RFILE * p)680 r_string(Py_ssize_t n, RFILE *p)
681 {
682     Py_ssize_t read = -1;
683 
684     if (p->ptr != NULL) {
685         /* Fast path for loads() */
686         const char *res = p->ptr;
687         Py_ssize_t left = p->end - p->ptr;
688         if (left < n) {
689             PyErr_SetString(PyExc_EOFError,
690                             "marshal data too short");
691             return NULL;
692         }
693         p->ptr += n;
694         return res;
695     }
696     if (p->buf == NULL) {
697         p->buf = PyMem_Malloc(n);
698         if (p->buf == NULL) {
699             PyErr_NoMemory();
700             return NULL;
701         }
702         p->buf_size = n;
703     }
704     else if (p->buf_size < n) {
705         char *tmp = PyMem_Realloc(p->buf, n);
706         if (tmp == NULL) {
707             PyErr_NoMemory();
708             return NULL;
709         }
710         p->buf = tmp;
711         p->buf_size = n;
712     }
713 
714     if (!p->readable) {
715         assert(p->fp != NULL);
716         read = fread(p->buf, 1, n, p->fp);
717     }
718     else {
719         PyObject *res, *mview;
720         Py_buffer buf;
721 
722         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
723             return NULL;
724         mview = PyMemoryView_FromBuffer(&buf);
725         if (mview == NULL)
726             return NULL;
727 
728         res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
729         if (res != NULL) {
730             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
731             Py_DECREF(res);
732         }
733     }
734     if (read != n) {
735         if (!PyErr_Occurred()) {
736             if (read > n)
737                 PyErr_Format(PyExc_ValueError,
738                              "read() returned too much data: "
739                              "%zd bytes requested, %zd returned",
740                              n, read);
741             else
742                 PyErr_SetString(PyExc_EOFError,
743                                 "EOF read where not expected");
744         }
745         return NULL;
746     }
747     return p->buf;
748 }
749 
750 static int
r_byte(RFILE * p)751 r_byte(RFILE *p)
752 {
753     int c = EOF;
754 
755     if (p->ptr != NULL) {
756         if (p->ptr < p->end)
757             c = (unsigned char) *p->ptr++;
758         return c;
759     }
760     if (!p->readable) {
761         assert(p->fp);
762         c = getc(p->fp);
763     }
764     else {
765         const char *ptr = r_string(1, p);
766         if (ptr != NULL)
767             c = *(const unsigned char *) ptr;
768     }
769     return c;
770 }
771 
772 static int
r_short(RFILE * p)773 r_short(RFILE *p)
774 {
775     short x = -1;
776     const unsigned char *buffer;
777 
778     buffer = (const unsigned char *) r_string(2, p);
779     if (buffer != NULL) {
780         x = buffer[0];
781         x |= buffer[1] << 8;
782         /* Sign-extension, in case short greater than 16 bits */
783         x |= -(x & 0x8000);
784     }
785     return x;
786 }
787 
788 static long
r_long(RFILE * p)789 r_long(RFILE *p)
790 {
791     long x = -1;
792     const unsigned char *buffer;
793 
794     buffer = (const unsigned char *) r_string(4, p);
795     if (buffer != NULL) {
796         x = buffer[0];
797         x |= (long)buffer[1] << 8;
798         x |= (long)buffer[2] << 16;
799         x |= (long)buffer[3] << 24;
800 #if SIZEOF_LONG > 4
801         /* Sign extension for 64-bit machines */
802         x |= -(x & 0x80000000L);
803 #endif
804     }
805     return x;
806 }
807 
808 /* r_long64 deals with the TYPE_INT64 code. */
809 static PyObject *
r_long64(RFILE * p)810 r_long64(RFILE *p)
811 {
812     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
813     if (buffer == NULL) {
814         return NULL;
815     }
816     return _PyLong_FromByteArray(buffer, 8,
817                                  1 /* little endian */,
818                                  1 /* signed */);
819 }
820 
821 static PyObject *
r_PyLong(RFILE * p)822 r_PyLong(RFILE *p)
823 {
824     PyLongObject *ob;
825     long n, size, i;
826     int j, md, shorts_in_top_digit;
827     digit d;
828 
829     n = r_long(p);
830     if (PyErr_Occurred())
831         return NULL;
832     if (n == 0)
833         return (PyObject *)_PyLong_New(0);
834     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
835         PyErr_SetString(PyExc_ValueError,
836                        "bad marshal data (long size out of range)");
837         return NULL;
838     }
839 
840     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
841     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
842     ob = _PyLong_New(size);
843     if (ob == NULL)
844         return NULL;
845 
846     Py_SET_SIZE(ob, n > 0 ? size : -size);
847 
848     for (i = 0; i < size-1; i++) {
849         d = 0;
850         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
851             md = r_short(p);
852             if (PyErr_Occurred()) {
853                 Py_DECREF(ob);
854                 return NULL;
855             }
856             if (md < 0 || md > PyLong_MARSHAL_BASE)
857                 goto bad_digit;
858             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
859         }
860         ob->ob_digit[i] = d;
861     }
862 
863     d = 0;
864     for (j=0; j < shorts_in_top_digit; j++) {
865         md = r_short(p);
866         if (PyErr_Occurred()) {
867             Py_DECREF(ob);
868             return NULL;
869         }
870         if (md < 0 || md > PyLong_MARSHAL_BASE)
871             goto bad_digit;
872         /* topmost marshal digit should be nonzero */
873         if (md == 0 && j == shorts_in_top_digit - 1) {
874             Py_DECREF(ob);
875             PyErr_SetString(PyExc_ValueError,
876                 "bad marshal data (unnormalized long data)");
877             return NULL;
878         }
879         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
880     }
881     if (PyErr_Occurred()) {
882         Py_DECREF(ob);
883         return NULL;
884     }
885     /* top digit should be nonzero, else the resulting PyLong won't be
886        normalized */
887     ob->ob_digit[size-1] = d;
888     return (PyObject *)ob;
889   bad_digit:
890     Py_DECREF(ob);
891     PyErr_SetString(PyExc_ValueError,
892                     "bad marshal data (digit out of range in long)");
893     return NULL;
894 }
895 
896 static double
r_float_bin(RFILE * p)897 r_float_bin(RFILE *p)
898 {
899     const char *buf = r_string(8, p);
900     if (buf == NULL)
901         return -1;
902     return PyFloat_Unpack8(buf, 1);
903 }
904 
905 /* Issue #33720: Disable inlining for reducing the C stack consumption
906    on PGO builds. */
907 Py_NO_INLINE static double
r_float_str(RFILE * p)908 r_float_str(RFILE *p)
909 {
910     int n;
911     char buf[256];
912     const char *ptr;
913     n = r_byte(p);
914     if (n == EOF) {
915         PyErr_SetString(PyExc_EOFError,
916             "EOF read where object expected");
917         return -1;
918     }
919     ptr = r_string(n, p);
920     if (ptr == NULL) {
921         return -1;
922     }
923     memcpy(buf, ptr, n);
924     buf[n] = '\0';
925     return PyOS_string_to_double(buf, NULL, NULL);
926 }
927 
928 /* allocate the reflist index for a new object. Return -1 on failure */
929 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)930 r_ref_reserve(int flag, RFILE *p)
931 {
932     if (flag) { /* currently only FLAG_REF is defined */
933         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
934         if (idx >= 0x7ffffffe) {
935             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
936             return -1;
937         }
938         if (PyList_Append(p->refs, Py_None) < 0)
939             return -1;
940         return idx;
941     } else
942         return 0;
943 }
944 
945 /* insert the new object 'o' to the reflist at previously
946  * allocated index 'idx'.
947  * 'o' can be NULL, in which case nothing is done.
948  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
949  * if 'o' was non-NULL, and the function fails, 'o' is released and
950  * NULL returned. This simplifies error checking at the call site since
951  * a single test for NULL for the function result is enough.
952  */
953 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)954 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
955 {
956     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
957         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
958         Py_INCREF(o);
959         PyList_SET_ITEM(p->refs, idx, o);
960         Py_DECREF(tmp);
961     }
962     return o;
963 }
964 
965 /* combination of both above, used when an object can be
966  * created whenever it is seen in the file, as opposed to
967  * after having loaded its sub-objects.
968  */
969 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)970 r_ref(PyObject *o, int flag, RFILE *p)
971 {
972     assert(flag & FLAG_REF);
973     if (o == NULL)
974         return NULL;
975     if (PyList_Append(p->refs, o) < 0) {
976         Py_DECREF(o); /* release the new object */
977         return NULL;
978     }
979     return o;
980 }
981 
982 static PyObject *
r_object(RFILE * p)983 r_object(RFILE *p)
984 {
985     /* NULL is a valid return value, it does not necessarily means that
986        an exception is set. */
987     PyObject *v, *v2;
988     Py_ssize_t idx = 0;
989     long i, n;
990     int type, code = r_byte(p);
991     int flag, is_interned = 0;
992     PyObject *retval = NULL;
993 
994     if (code == EOF) {
995         PyErr_SetString(PyExc_EOFError,
996                         "EOF read where object expected");
997         return NULL;
998     }
999 
1000     p->depth++;
1001 
1002     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1003         p->depth--;
1004         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1005         return NULL;
1006     }
1007 
1008     flag = code & FLAG_REF;
1009     type = code & ~FLAG_REF;
1010 
1011 #define R_REF(O) do{\
1012     if (flag) \
1013         O = r_ref(O, flag, p);\
1014 } while (0)
1015 
1016     switch (type) {
1017 
1018     case TYPE_NULL:
1019         break;
1020 
1021     case TYPE_NONE:
1022         Py_INCREF(Py_None);
1023         retval = Py_None;
1024         break;
1025 
1026     case TYPE_STOPITER:
1027         Py_INCREF(PyExc_StopIteration);
1028         retval = PyExc_StopIteration;
1029         break;
1030 
1031     case TYPE_ELLIPSIS:
1032         Py_INCREF(Py_Ellipsis);
1033         retval = Py_Ellipsis;
1034         break;
1035 
1036     case TYPE_FALSE:
1037         Py_INCREF(Py_False);
1038         retval = Py_False;
1039         break;
1040 
1041     case TYPE_TRUE:
1042         Py_INCREF(Py_True);
1043         retval = Py_True;
1044         break;
1045 
1046     case TYPE_INT:
1047         n = r_long(p);
1048         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1049         R_REF(retval);
1050         break;
1051 
1052     case TYPE_INT64:
1053         retval = r_long64(p);
1054         R_REF(retval);
1055         break;
1056 
1057     case TYPE_LONG:
1058         retval = r_PyLong(p);
1059         R_REF(retval);
1060         break;
1061 
1062     case TYPE_FLOAT:
1063         {
1064             double x = r_float_str(p);
1065             if (x == -1.0 && PyErr_Occurred())
1066                 break;
1067             retval = PyFloat_FromDouble(x);
1068             R_REF(retval);
1069             break;
1070         }
1071 
1072     case TYPE_BINARY_FLOAT:
1073         {
1074             double x = r_float_bin(p);
1075             if (x == -1.0 && PyErr_Occurred())
1076                 break;
1077             retval = PyFloat_FromDouble(x);
1078             R_REF(retval);
1079             break;
1080         }
1081 
1082     case TYPE_COMPLEX:
1083         {
1084             Py_complex c;
1085             c.real = r_float_str(p);
1086             if (c.real == -1.0 && PyErr_Occurred())
1087                 break;
1088             c.imag = r_float_str(p);
1089             if (c.imag == -1.0 && PyErr_Occurred())
1090                 break;
1091             retval = PyComplex_FromCComplex(c);
1092             R_REF(retval);
1093             break;
1094         }
1095 
1096     case TYPE_BINARY_COMPLEX:
1097         {
1098             Py_complex c;
1099             c.real = r_float_bin(p);
1100             if (c.real == -1.0 && PyErr_Occurred())
1101                 break;
1102             c.imag = r_float_bin(p);
1103             if (c.imag == -1.0 && PyErr_Occurred())
1104                 break;
1105             retval = PyComplex_FromCComplex(c);
1106             R_REF(retval);
1107             break;
1108         }
1109 
1110     case TYPE_STRING:
1111         {
1112             const char *ptr;
1113             n = r_long(p);
1114             if (PyErr_Occurred())
1115                 break;
1116             if (n < 0 || n > SIZE32_MAX) {
1117                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1118                 break;
1119             }
1120             v = PyBytes_FromStringAndSize((char *)NULL, n);
1121             if (v == NULL)
1122                 break;
1123             ptr = r_string(n, p);
1124             if (ptr == NULL) {
1125                 Py_DECREF(v);
1126                 break;
1127             }
1128             memcpy(PyBytes_AS_STRING(v), ptr, n);
1129             retval = v;
1130             R_REF(retval);
1131             break;
1132         }
1133 
1134     case TYPE_ASCII_INTERNED:
1135         is_interned = 1;
1136         /* fall through */
1137     case TYPE_ASCII:
1138         n = r_long(p);
1139         if (PyErr_Occurred())
1140             break;
1141         if (n < 0 || n > SIZE32_MAX) {
1142             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1143             break;
1144         }
1145         goto _read_ascii;
1146 
1147     case TYPE_SHORT_ASCII_INTERNED:
1148         is_interned = 1;
1149         /* fall through */
1150     case TYPE_SHORT_ASCII:
1151         n = r_byte(p);
1152         if (n == EOF) {
1153             PyErr_SetString(PyExc_EOFError,
1154                 "EOF read where object expected");
1155             break;
1156         }
1157     _read_ascii:
1158         {
1159             const char *ptr;
1160             ptr = r_string(n, p);
1161             if (ptr == NULL)
1162                 break;
1163             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1164             if (v == NULL)
1165                 break;
1166             if (is_interned)
1167                 PyUnicode_InternInPlace(&v);
1168             retval = v;
1169             R_REF(retval);
1170             break;
1171         }
1172 
1173     case TYPE_INTERNED:
1174         is_interned = 1;
1175         /* fall through */
1176     case TYPE_UNICODE:
1177         {
1178         const char *buffer;
1179 
1180         n = r_long(p);
1181         if (PyErr_Occurred())
1182             break;
1183         if (n < 0 || n > SIZE32_MAX) {
1184             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1185             break;
1186         }
1187         if (n != 0) {
1188             buffer = r_string(n, p);
1189             if (buffer == NULL)
1190                 break;
1191             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1192         }
1193         else {
1194             v = PyUnicode_New(0, 0);
1195         }
1196         if (v == NULL)
1197             break;
1198         if (is_interned)
1199             PyUnicode_InternInPlace(&v);
1200         retval = v;
1201         R_REF(retval);
1202         break;
1203         }
1204 
1205     case TYPE_SMALL_TUPLE:
1206         n = (unsigned char) r_byte(p);
1207         if (PyErr_Occurred())
1208             break;
1209         goto _read_tuple;
1210     case TYPE_TUPLE:
1211         n = r_long(p);
1212         if (PyErr_Occurred())
1213             break;
1214         if (n < 0 || n > SIZE32_MAX) {
1215             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1216             break;
1217         }
1218     _read_tuple:
1219         v = PyTuple_New(n);
1220         R_REF(v);
1221         if (v == NULL)
1222             break;
1223 
1224         for (i = 0; i < n; i++) {
1225             v2 = r_object(p);
1226             if ( v2 == NULL ) {
1227                 if (!PyErr_Occurred())
1228                     PyErr_SetString(PyExc_TypeError,
1229                         "NULL object in marshal data for tuple");
1230                 Py_DECREF(v);
1231                 v = NULL;
1232                 break;
1233             }
1234             PyTuple_SET_ITEM(v, i, v2);
1235         }
1236         retval = v;
1237         break;
1238 
1239     case TYPE_LIST:
1240         n = r_long(p);
1241         if (PyErr_Occurred())
1242             break;
1243         if (n < 0 || n > SIZE32_MAX) {
1244             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1245             break;
1246         }
1247         v = PyList_New(n);
1248         R_REF(v);
1249         if (v == NULL)
1250             break;
1251         for (i = 0; i < n; i++) {
1252             v2 = r_object(p);
1253             if ( v2 == NULL ) {
1254                 if (!PyErr_Occurred())
1255                     PyErr_SetString(PyExc_TypeError,
1256                         "NULL object in marshal data for list");
1257                 Py_DECREF(v);
1258                 v = NULL;
1259                 break;
1260             }
1261             PyList_SET_ITEM(v, i, v2);
1262         }
1263         retval = v;
1264         break;
1265 
1266     case TYPE_DICT:
1267         v = PyDict_New();
1268         R_REF(v);
1269         if (v == NULL)
1270             break;
1271         for (;;) {
1272             PyObject *key, *val;
1273             key = r_object(p);
1274             if (key == NULL)
1275                 break;
1276             val = r_object(p);
1277             if (val == NULL) {
1278                 Py_DECREF(key);
1279                 break;
1280             }
1281             if (PyDict_SetItem(v, key, val) < 0) {
1282                 Py_DECREF(key);
1283                 Py_DECREF(val);
1284                 break;
1285             }
1286             Py_DECREF(key);
1287             Py_DECREF(val);
1288         }
1289         if (PyErr_Occurred()) {
1290             Py_DECREF(v);
1291             v = NULL;
1292         }
1293         retval = v;
1294         break;
1295 
1296     case TYPE_SET:
1297     case TYPE_FROZENSET:
1298         n = r_long(p);
1299         if (PyErr_Occurred())
1300             break;
1301         if (n < 0 || n > SIZE32_MAX) {
1302             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1303             break;
1304         }
1305 
1306         if (n == 0 && type == TYPE_FROZENSET) {
1307             /* call frozenset() to get the empty frozenset singleton */
1308             v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1309             if (v == NULL)
1310                 break;
1311             R_REF(v);
1312             retval = v;
1313         }
1314         else {
1315             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1316             if (type == TYPE_SET) {
1317                 R_REF(v);
1318             } else {
1319                 /* must use delayed registration of frozensets because they must
1320                  * be init with a refcount of 1
1321                  */
1322                 idx = r_ref_reserve(flag, p);
1323                 if (idx < 0)
1324                     Py_CLEAR(v); /* signal error */
1325             }
1326             if (v == NULL)
1327                 break;
1328 
1329             for (i = 0; i < n; i++) {
1330                 v2 = r_object(p);
1331                 if ( v2 == NULL ) {
1332                     if (!PyErr_Occurred())
1333                         PyErr_SetString(PyExc_TypeError,
1334                             "NULL object in marshal data for set");
1335                     Py_DECREF(v);
1336                     v = NULL;
1337                     break;
1338                 }
1339                 if (PySet_Add(v, v2) == -1) {
1340                     Py_DECREF(v);
1341                     Py_DECREF(v2);
1342                     v = NULL;
1343                     break;
1344                 }
1345                 Py_DECREF(v2);
1346             }
1347             if (type != TYPE_SET)
1348                 v = r_ref_insert(v, idx, flag, p);
1349             retval = v;
1350         }
1351         break;
1352 
1353     case TYPE_CODE:
1354         {
1355             int argcount;
1356             int posonlyargcount;
1357             int kwonlyargcount;
1358             int stacksize;
1359             int flags;
1360             PyObject *code = NULL;
1361             PyObject *consts = NULL;
1362             PyObject *names = NULL;
1363             PyObject *localsplusnames = NULL;
1364             PyObject *localspluskinds = NULL;
1365             PyObject *filename = NULL;
1366             PyObject *name = NULL;
1367             PyObject *qualname = NULL;
1368             int firstlineno;
1369             PyObject* linetable = NULL;
1370             PyObject *exceptiontable = NULL;
1371 
1372             idx = r_ref_reserve(flag, p);
1373             if (idx < 0)
1374                 break;
1375 
1376             v = NULL;
1377 
1378             /* XXX ignore long->int overflows for now */
1379             argcount = (int)r_long(p);
1380             if (PyErr_Occurred())
1381                 goto code_error;
1382             posonlyargcount = (int)r_long(p);
1383             if (PyErr_Occurred()) {
1384                 goto code_error;
1385             }
1386             kwonlyargcount = (int)r_long(p);
1387             if (PyErr_Occurred())
1388                 goto code_error;
1389             stacksize = (int)r_long(p);
1390             if (PyErr_Occurred())
1391                 goto code_error;
1392             flags = (int)r_long(p);
1393             if (PyErr_Occurred())
1394                 goto code_error;
1395             code = r_object(p);
1396             if (code == NULL)
1397                 goto code_error;
1398             consts = r_object(p);
1399             if (consts == NULL)
1400                 goto code_error;
1401             names = r_object(p);
1402             if (names == NULL)
1403                 goto code_error;
1404             localsplusnames = r_object(p);
1405             if (localsplusnames == NULL)
1406                 goto code_error;
1407             localspluskinds = r_object(p);
1408             if (localspluskinds == NULL)
1409                 goto code_error;
1410             filename = r_object(p);
1411             if (filename == NULL)
1412                 goto code_error;
1413             name = r_object(p);
1414             if (name == NULL)
1415                 goto code_error;
1416             qualname = r_object(p);
1417             if (qualname == NULL)
1418                 goto code_error;
1419             firstlineno = (int)r_long(p);
1420             if (firstlineno == -1 && PyErr_Occurred())
1421                 break;
1422             linetable = r_object(p);
1423             if (linetable == NULL)
1424                 goto code_error;
1425             exceptiontable = r_object(p);
1426             if (exceptiontable == NULL)
1427                 goto code_error;
1428 
1429             struct _PyCodeConstructor con = {
1430                 .filename = filename,
1431                 .name = name,
1432                 .qualname = qualname,
1433                 .flags = flags,
1434 
1435                 .code = code,
1436                 .firstlineno = firstlineno,
1437                 .linetable = linetable,
1438 
1439                 .consts = consts,
1440                 .names = names,
1441 
1442                 .localsplusnames = localsplusnames,
1443                 .localspluskinds = localspluskinds,
1444 
1445                 .argcount = argcount,
1446                 .posonlyargcount = posonlyargcount,
1447                 .kwonlyargcount = kwonlyargcount,
1448 
1449                 .stacksize = stacksize,
1450 
1451                 .exceptiontable = exceptiontable,
1452             };
1453 
1454             if (_PyCode_Validate(&con) < 0) {
1455                 goto code_error;
1456             }
1457 
1458             v = (PyObject *)_PyCode_New(&con);
1459             if (v == NULL) {
1460                 goto code_error;
1461             }
1462 
1463             v = r_ref_insert(v, idx, flag, p);
1464 
1465           code_error:
1466             Py_XDECREF(code);
1467             Py_XDECREF(consts);
1468             Py_XDECREF(names);
1469             Py_XDECREF(localsplusnames);
1470             Py_XDECREF(localspluskinds);
1471             Py_XDECREF(filename);
1472             Py_XDECREF(name);
1473             Py_XDECREF(qualname);
1474             Py_XDECREF(linetable);
1475             Py_XDECREF(exceptiontable);
1476         }
1477         retval = v;
1478         break;
1479 
1480     case TYPE_REF:
1481         n = r_long(p);
1482         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1483             if (n == -1 && PyErr_Occurred())
1484                 break;
1485             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1486             break;
1487         }
1488         v = PyList_GET_ITEM(p->refs, n);
1489         if (v == Py_None) {
1490             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1491             break;
1492         }
1493         Py_INCREF(v);
1494         retval = v;
1495         break;
1496 
1497     default:
1498         /* Bogus data got written, which isn't ideal.
1499            This will let you keep working and recover. */
1500         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1501         break;
1502 
1503     }
1504     p->depth--;
1505     return retval;
1506 }
1507 
1508 static PyObject *
read_object(RFILE * p)1509 read_object(RFILE *p)
1510 {
1511     PyObject *v;
1512     if (PyErr_Occurred()) {
1513         fprintf(stderr, "XXX readobject called with exception set\n");
1514         return NULL;
1515     }
1516     if (p->ptr && p->end) {
1517         if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1518             return NULL;
1519         }
1520     } else if (p->fp || p->readable) {
1521         if (PySys_Audit("marshal.load", NULL) < 0) {
1522             return NULL;
1523         }
1524     }
1525     v = r_object(p);
1526     if (v == NULL && !PyErr_Occurred())
1527         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1528     return v;
1529 }
1530 
1531 int
PyMarshal_ReadShortFromFile(FILE * fp)1532 PyMarshal_ReadShortFromFile(FILE *fp)
1533 {
1534     RFILE rf;
1535     int res;
1536     assert(fp);
1537     rf.readable = NULL;
1538     rf.fp = fp;
1539     rf.end = rf.ptr = NULL;
1540     rf.buf = NULL;
1541     res = r_short(&rf);
1542     if (rf.buf != NULL)
1543         PyMem_Free(rf.buf);
1544     return res;
1545 }
1546 
1547 long
PyMarshal_ReadLongFromFile(FILE * fp)1548 PyMarshal_ReadLongFromFile(FILE *fp)
1549 {
1550     RFILE rf;
1551     long res;
1552     rf.fp = fp;
1553     rf.readable = NULL;
1554     rf.ptr = rf.end = NULL;
1555     rf.buf = NULL;
1556     res = r_long(&rf);
1557     if (rf.buf != NULL)
1558         PyMem_Free(rf.buf);
1559     return res;
1560 }
1561 
1562 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1563 static off_t
getfilesize(FILE * fp)1564 getfilesize(FILE *fp)
1565 {
1566     struct _Py_stat_struct st;
1567     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1568         return -1;
1569 #if SIZEOF_OFF_T == 4
1570     else if (st.st_size >= INT_MAX)
1571         return (off_t)INT_MAX;
1572 #endif
1573     else
1574         return (off_t)st.st_size;
1575 }
1576 
1577 /* If we can get the size of the file up-front, and it's reasonably small,
1578  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1579  * than reading a byte at a time from file; speeds .pyc imports.
1580  * CAUTION:  since this may read the entire remainder of the file, don't
1581  * call it unless you know you're done with the file.
1582  */
1583 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1584 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1585 {
1586 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1587 #define REASONABLE_FILE_LIMIT (1L << 18)
1588     off_t filesize;
1589     filesize = getfilesize(fp);
1590     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1591         char* pBuf = (char *)PyMem_Malloc(filesize);
1592         if (pBuf != NULL) {
1593             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1594             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1595             PyMem_Free(pBuf);
1596             return v;
1597         }
1598 
1599     }
1600     /* We don't have fstat, or we do but the file is larger than
1601      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1602      */
1603     return PyMarshal_ReadObjectFromFile(fp);
1604 
1605 #undef REASONABLE_FILE_LIMIT
1606 }
1607 
1608 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1609 PyMarshal_ReadObjectFromFile(FILE *fp)
1610 {
1611     RFILE rf;
1612     PyObject *result;
1613     rf.fp = fp;
1614     rf.readable = NULL;
1615     rf.depth = 0;
1616     rf.ptr = rf.end = NULL;
1617     rf.buf = NULL;
1618     rf.refs = PyList_New(0);
1619     if (rf.refs == NULL)
1620         return NULL;
1621     result = read_object(&rf);
1622     Py_DECREF(rf.refs);
1623     if (rf.buf != NULL)
1624         PyMem_Free(rf.buf);
1625     return result;
1626 }
1627 
1628 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1629 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1630 {
1631     RFILE rf;
1632     PyObject *result;
1633     rf.fp = NULL;
1634     rf.readable = NULL;
1635     rf.ptr = str;
1636     rf.end = str + len;
1637     rf.buf = NULL;
1638     rf.depth = 0;
1639     rf.refs = PyList_New(0);
1640     if (rf.refs == NULL)
1641         return NULL;
1642     result = read_object(&rf);
1643     Py_DECREF(rf.refs);
1644     if (rf.buf != NULL)
1645         PyMem_Free(rf.buf);
1646     return result;
1647 }
1648 
1649 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1650 PyMarshal_WriteObjectToString(PyObject *x, int version)
1651 {
1652     WFILE wf;
1653 
1654     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1655         return NULL;
1656     }
1657     memset(&wf, 0, sizeof(wf));
1658     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1659     if (wf.str == NULL)
1660         return NULL;
1661     wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1662     wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1663     wf.error = WFERR_OK;
1664     wf.version = version;
1665     if (w_init_refs(&wf, version)) {
1666         Py_DECREF(wf.str);
1667         return NULL;
1668     }
1669     w_object(x, &wf);
1670     w_clear_refs(&wf);
1671     if (wf.str != NULL) {
1672         const char *base = PyBytes_AS_STRING(wf.str);
1673         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1674             return NULL;
1675     }
1676     if (wf.error != WFERR_OK) {
1677         Py_XDECREF(wf.str);
1678         if (wf.error == WFERR_NOMEMORY)
1679             PyErr_NoMemory();
1680         else
1681             PyErr_SetString(PyExc_ValueError,
1682               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1683                :"object too deeply nested to marshal");
1684         return NULL;
1685     }
1686     return wf.str;
1687 }
1688 
1689 /* And an interface for Python programs... */
1690 /*[clinic input]
1691 marshal.dump
1692 
1693     value: object
1694         Must be a supported type.
1695     file: object
1696         Must be a writeable binary file.
1697     version: int(c_default="Py_MARSHAL_VERSION") = version
1698         Indicates the data format that dump should use.
1699     /
1700 
1701 Write the value on the open file.
1702 
1703 If the value has (or contains an object that has) an unsupported type, a
1704 ValueError exception is raised - but garbage data will also be written
1705 to the file. The object will not be properly read back by load().
1706 [clinic start generated code]*/
1707 
1708 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1709 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1710                   int version)
1711 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1712 {
1713     /* XXX Quick hack -- need to do this differently */
1714     PyObject *s;
1715     PyObject *res;
1716 
1717     s = PyMarshal_WriteObjectToString(value, version);
1718     if (s == NULL)
1719         return NULL;
1720     res = _PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1721     Py_DECREF(s);
1722     return res;
1723 }
1724 
1725 /*[clinic input]
1726 marshal.load
1727 
1728     file: object
1729         Must be readable binary file.
1730     /
1731 
1732 Read one value from the open file and return it.
1733 
1734 If no valid value is read (e.g. because the data has a different Python
1735 version's incompatible marshal format), raise EOFError, ValueError or
1736 TypeError.
1737 
1738 Note: If an object containing an unsupported type was marshalled with
1739 dump(), load() will substitute None for the unmarshallable type.
1740 [clinic start generated code]*/
1741 
1742 static PyObject *
marshal_load(PyObject * module,PyObject * file)1743 marshal_load(PyObject *module, PyObject *file)
1744 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1745 {
1746     PyObject *data, *result;
1747     RFILE rf;
1748 
1749     /*
1750      * Make a call to the read method, but read zero bytes.
1751      * This is to ensure that the object passed in at least
1752      * has a read method which returns bytes.
1753      * This can be removed if we guarantee good error handling
1754      * for r_string()
1755      */
1756     data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1757     if (data == NULL)
1758         return NULL;
1759     if (!PyBytes_Check(data)) {
1760         PyErr_Format(PyExc_TypeError,
1761                      "file.read() returned not bytes but %.100s",
1762                      Py_TYPE(data)->tp_name);
1763         result = NULL;
1764     }
1765     else {
1766         rf.depth = 0;
1767         rf.fp = NULL;
1768         rf.readable = file;
1769         rf.ptr = rf.end = NULL;
1770         rf.buf = NULL;
1771         if ((rf.refs = PyList_New(0)) != NULL) {
1772             result = read_object(&rf);
1773             Py_DECREF(rf.refs);
1774             if (rf.buf != NULL)
1775                 PyMem_Free(rf.buf);
1776         } else
1777             result = NULL;
1778     }
1779     Py_DECREF(data);
1780     return result;
1781 }
1782 
1783 /*[clinic input]
1784 marshal.dumps
1785 
1786     value: object
1787         Must be a supported type.
1788     version: int(c_default="Py_MARSHAL_VERSION") = version
1789         Indicates the data format that dumps should use.
1790     /
1791 
1792 Return the bytes object that would be written to a file by dump(value, file).
1793 
1794 Raise a ValueError exception if value has (or contains an object that has) an
1795 unsupported type.
1796 [clinic start generated code]*/
1797 
1798 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1799 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1800 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1801 {
1802     return PyMarshal_WriteObjectToString(value, version);
1803 }
1804 
1805 /*[clinic input]
1806 marshal.loads
1807 
1808     bytes: Py_buffer
1809     /
1810 
1811 Convert the bytes-like object to a value.
1812 
1813 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1814 bytes in the input are ignored.
1815 [clinic start generated code]*/
1816 
1817 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1818 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1819 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1820 {
1821     RFILE rf;
1822     char *s = bytes->buf;
1823     Py_ssize_t n = bytes->len;
1824     PyObject* result;
1825     rf.fp = NULL;
1826     rf.readable = NULL;
1827     rf.ptr = s;
1828     rf.end = s + n;
1829     rf.depth = 0;
1830     if ((rf.refs = PyList_New(0)) == NULL)
1831         return NULL;
1832     result = read_object(&rf);
1833     Py_DECREF(rf.refs);
1834     return result;
1835 }
1836 
1837 static PyMethodDef marshal_methods[] = {
1838     MARSHAL_DUMP_METHODDEF
1839     MARSHAL_LOAD_METHODDEF
1840     MARSHAL_DUMPS_METHODDEF
1841     MARSHAL_LOADS_METHODDEF
1842     {NULL,              NULL}           /* sentinel */
1843 };
1844 
1845 
1846 PyDoc_STRVAR(module_doc,
1847 "This module contains functions that can read and write Python values in\n\
1848 a binary format. The format is specific to Python, but independent of\n\
1849 machine architecture issues.\n\
1850 \n\
1851 Not all Python object types are supported; in general, only objects\n\
1852 whose value is independent from a particular invocation of Python can be\n\
1853 written and read by this module. The following types are supported:\n\
1854 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1855 tuples, lists, sets, dictionaries, and code objects, where it\n\
1856 should be understood that tuples, lists and dictionaries are only\n\
1857 supported as long as the values contained therein are themselves\n\
1858 supported; and recursive lists and dictionaries should not be written\n\
1859 (they will cause infinite loops).\n\
1860 \n\
1861 Variables:\n\
1862 \n\
1863 version -- indicates the format that the module uses. Version 0 is the\n\
1864     historical format, version 1 shares interned strings and version 2\n\
1865     uses a binary format for floating point numbers.\n\
1866     Version 3 shares common object references (New in version 3.4).\n\
1867 \n\
1868 Functions:\n\
1869 \n\
1870 dump() -- write value to a file\n\
1871 load() -- read value from a file\n\
1872 dumps() -- marshal value as a bytes object\n\
1873 loads() -- read value from a bytes-like object");
1874 
1875 
1876 static int
marshal_module_exec(PyObject * mod)1877 marshal_module_exec(PyObject *mod)
1878 {
1879     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1880         return -1;
1881     }
1882     return 0;
1883 }
1884 
1885 static PyModuleDef_Slot marshalmodule_slots[] = {
1886     {Py_mod_exec, marshal_module_exec},
1887     {0, NULL}
1888 };
1889 
1890 static struct PyModuleDef marshalmodule = {
1891     PyModuleDef_HEAD_INIT,
1892     .m_name = "marshal",
1893     .m_doc = module_doc,
1894     .m_methods = marshal_methods,
1895     .m_slots = marshalmodule_slots,
1896 };
1897 
1898 PyMODINIT_FUNC
PyMarshal_Init(void)1899 PyMarshal_Init(void)
1900 {
1901     return PyModuleDef_Init(&marshalmodule);
1902 }
1903