1 /* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #ifndef Py_BUILD_CORE_BUILTIN
8 # define Py_BUILD_CORE_MODULE 1
9 #endif
10 #define NEEDS_PY_IDENTIFIER
11
12 #include "Python.h"
13 #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
14 #include "structmember.h" // PyMemberDef
15 #include "pycore_accu.h"
16
17 typedef struct {
18 PyObject *PyScannerType;
19 PyObject *PyEncoderType;
20 } _jsonmodulestate;
21
22 static inline _jsonmodulestate*
get_json_state(PyObject * module)23 get_json_state(PyObject *module)
24 {
25 void *state = PyModule_GetState(module);
26 assert(state != NULL);
27 return (_jsonmodulestate *)state;
28 }
29
30
31 typedef struct _PyScannerObject {
32 PyObject_HEAD
33 signed char strict;
34 PyObject *object_hook;
35 PyObject *object_pairs_hook;
36 PyObject *parse_float;
37 PyObject *parse_int;
38 PyObject *parse_constant;
39 PyObject *memo;
40 } PyScannerObject;
41
42 static PyMemberDef scanner_members[] = {
43 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
44 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
45 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
46 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
47 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
48 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
49 {NULL}
50 };
51
52 typedef struct _PyEncoderObject {
53 PyObject_HEAD
54 PyObject *markers;
55 PyObject *defaultfn;
56 PyObject *encoder;
57 PyObject *indent;
58 PyObject *key_separator;
59 PyObject *item_separator;
60 char sort_keys;
61 char skipkeys;
62 int allow_nan;
63 PyCFunction fast_encode;
64 } PyEncoderObject;
65
66 static PyMemberDef encoder_members[] = {
67 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
68 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
69 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
70 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
71 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
72 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
73 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
74 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
75 {NULL}
76 };
77
78 /* Forward decls */
79
80 static PyObject *
81 ascii_escape_unicode(PyObject *pystr);
82 static PyObject *
83 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
84 static PyObject *
85 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
86 static PyObject *
87 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
88 static PyObject *
89 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
90 static void
91 scanner_dealloc(PyObject *self);
92 static int
93 scanner_clear(PyScannerObject *self);
94 static PyObject *
95 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
96 static void
97 encoder_dealloc(PyObject *self);
98 static int
99 encoder_clear(PyEncoderObject *self);
100 static int
101 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
102 static int
103 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
104 static int
105 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
106 static PyObject *
107 _encoded_const(PyObject *obj);
108 static void
109 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
110 static PyObject *
111 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
112 static PyObject *
113 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
114
115 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
116 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
117
118 static Py_ssize_t
ascii_escape_unichar(Py_UCS4 c,unsigned char * output,Py_ssize_t chars)119 ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
120 {
121 /* Escape unicode code point c to ASCII escape sequences
122 in char *output. output must have at least 12 bytes unused to
123 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
124 output[chars++] = '\\';
125 switch (c) {
126 case '\\': output[chars++] = c; break;
127 case '"': output[chars++] = c; break;
128 case '\b': output[chars++] = 'b'; break;
129 case '\f': output[chars++] = 'f'; break;
130 case '\n': output[chars++] = 'n'; break;
131 case '\r': output[chars++] = 'r'; break;
132 case '\t': output[chars++] = 't'; break;
133 default:
134 if (c >= 0x10000) {
135 /* UTF-16 surrogate pair */
136 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
137 output[chars++] = 'u';
138 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
139 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
140 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
141 output[chars++] = Py_hexdigits[(v ) & 0xf];
142 c = Py_UNICODE_LOW_SURROGATE(c);
143 output[chars++] = '\\';
144 }
145 output[chars++] = 'u';
146 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
147 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
148 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
149 output[chars++] = Py_hexdigits[(c ) & 0xf];
150 }
151 return chars;
152 }
153
154 static PyObject *
ascii_escape_unicode(PyObject * pystr)155 ascii_escape_unicode(PyObject *pystr)
156 {
157 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
158 Py_ssize_t i;
159 Py_ssize_t input_chars;
160 Py_ssize_t output_size;
161 Py_ssize_t chars;
162 PyObject *rval;
163 const void *input;
164 Py_UCS1 *output;
165 int kind;
166
167 if (PyUnicode_READY(pystr) == -1)
168 return NULL;
169
170 input_chars = PyUnicode_GET_LENGTH(pystr);
171 input = PyUnicode_DATA(pystr);
172 kind = PyUnicode_KIND(pystr);
173
174 /* Compute the output size */
175 for (i = 0, output_size = 2; i < input_chars; i++) {
176 Py_UCS4 c = PyUnicode_READ(kind, input, i);
177 Py_ssize_t d;
178 if (S_CHAR(c)) {
179 d = 1;
180 }
181 else {
182 switch(c) {
183 case '\\': case '"': case '\b': case '\f':
184 case '\n': case '\r': case '\t':
185 d = 2; break;
186 default:
187 d = c >= 0x10000 ? 12 : 6;
188 }
189 }
190 if (output_size > PY_SSIZE_T_MAX - d) {
191 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
192 return NULL;
193 }
194 output_size += d;
195 }
196
197 rval = PyUnicode_New(output_size, 127);
198 if (rval == NULL) {
199 return NULL;
200 }
201 output = PyUnicode_1BYTE_DATA(rval);
202 chars = 0;
203 output[chars++] = '"';
204 for (i = 0; i < input_chars; i++) {
205 Py_UCS4 c = PyUnicode_READ(kind, input, i);
206 if (S_CHAR(c)) {
207 output[chars++] = c;
208 }
209 else {
210 chars = ascii_escape_unichar(c, output, chars);
211 }
212 }
213 output[chars++] = '"';
214 #ifdef Py_DEBUG
215 assert(_PyUnicode_CheckConsistency(rval, 1));
216 #endif
217 return rval;
218 }
219
220 static PyObject *
escape_unicode(PyObject * pystr)221 escape_unicode(PyObject *pystr)
222 {
223 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
224 Py_ssize_t i;
225 Py_ssize_t input_chars;
226 Py_ssize_t output_size;
227 Py_ssize_t chars;
228 PyObject *rval;
229 const void *input;
230 int kind;
231 Py_UCS4 maxchar;
232
233 if (PyUnicode_READY(pystr) == -1)
234 return NULL;
235
236 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
237 input_chars = PyUnicode_GET_LENGTH(pystr);
238 input = PyUnicode_DATA(pystr);
239 kind = PyUnicode_KIND(pystr);
240
241 /* Compute the output size */
242 for (i = 0, output_size = 2; i < input_chars; i++) {
243 Py_UCS4 c = PyUnicode_READ(kind, input, i);
244 Py_ssize_t d;
245 switch (c) {
246 case '\\': case '"': case '\b': case '\f':
247 case '\n': case '\r': case '\t':
248 d = 2;
249 break;
250 default:
251 if (c <= 0x1f)
252 d = 6;
253 else
254 d = 1;
255 }
256 if (output_size > PY_SSIZE_T_MAX - d) {
257 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
258 return NULL;
259 }
260 output_size += d;
261 }
262
263 rval = PyUnicode_New(output_size, maxchar);
264 if (rval == NULL)
265 return NULL;
266
267 kind = PyUnicode_KIND(rval);
268
269 #define ENCODE_OUTPUT do { \
270 chars = 0; \
271 output[chars++] = '"'; \
272 for (i = 0; i < input_chars; i++) { \
273 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
274 switch (c) { \
275 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
276 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
277 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
278 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
279 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
280 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
281 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
282 default: \
283 if (c <= 0x1f) { \
284 output[chars++] = '\\'; \
285 output[chars++] = 'u'; \
286 output[chars++] = '0'; \
287 output[chars++] = '0'; \
288 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
289 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
290 } else { \
291 output[chars++] = c; \
292 } \
293 } \
294 } \
295 output[chars++] = '"'; \
296 } while (0)
297
298 if (kind == PyUnicode_1BYTE_KIND) {
299 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
300 ENCODE_OUTPUT;
301 } else if (kind == PyUnicode_2BYTE_KIND) {
302 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
303 ENCODE_OUTPUT;
304 } else {
305 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
306 assert(kind == PyUnicode_4BYTE_KIND);
307 ENCODE_OUTPUT;
308 }
309 #undef ENCODE_OUTPUT
310
311 #ifdef Py_DEBUG
312 assert(_PyUnicode_CheckConsistency(rval, 1));
313 #endif
314 return rval;
315 }
316
317 static void
raise_errmsg(const char * msg,PyObject * s,Py_ssize_t end)318 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
319 {
320 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
321 _Py_static_string(PyId_decoder, "json.decoder");
322 PyObject *decoder = _PyImport_GetModuleId(&PyId_decoder);
323 if (decoder == NULL) {
324 return;
325 }
326
327 _Py_IDENTIFIER(JSONDecodeError);
328 PyObject *JSONDecodeError = _PyObject_GetAttrId(decoder, &PyId_JSONDecodeError);
329 Py_DECREF(decoder);
330 if (JSONDecodeError == NULL) {
331 return;
332 }
333
334 PyObject *exc;
335 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
336 Py_DECREF(JSONDecodeError);
337 if (exc) {
338 PyErr_SetObject(JSONDecodeError, exc);
339 Py_DECREF(exc);
340 }
341 }
342
343 static void
raise_stop_iteration(Py_ssize_t idx)344 raise_stop_iteration(Py_ssize_t idx)
345 {
346 PyObject *value = PyLong_FromSsize_t(idx);
347 if (value != NULL) {
348 PyErr_SetObject(PyExc_StopIteration, value);
349 Py_DECREF(value);
350 }
351 }
352
353 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)354 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
355 /* return (rval, idx) tuple, stealing reference to rval */
356 PyObject *tpl;
357 PyObject *pyidx;
358 /*
359 steal a reference to rval, returns (rval, idx)
360 */
361 if (rval == NULL) {
362 return NULL;
363 }
364 pyidx = PyLong_FromSsize_t(idx);
365 if (pyidx == NULL) {
366 Py_DECREF(rval);
367 return NULL;
368 }
369 tpl = PyTuple_New(2);
370 if (tpl == NULL) {
371 Py_DECREF(pyidx);
372 Py_DECREF(rval);
373 return NULL;
374 }
375 PyTuple_SET_ITEM(tpl, 0, rval);
376 PyTuple_SET_ITEM(tpl, 1, pyidx);
377 return tpl;
378 }
379
380 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)381 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
382 {
383 /* Read the JSON string from PyUnicode pystr.
384 end is the index of the first character after the quote.
385 if strict is zero then literal control characters are allowed
386 *next_end_ptr is a return-by-reference index of the character
387 after the end quote
388
389 Return value is a new PyUnicode
390 */
391 PyObject *rval = NULL;
392 Py_ssize_t len;
393 Py_ssize_t begin = end - 1;
394 Py_ssize_t next /* = begin */;
395 const void *buf;
396 int kind;
397
398 if (PyUnicode_READY(pystr) == -1)
399 return 0;
400
401 _PyUnicodeWriter writer;
402 _PyUnicodeWriter_Init(&writer);
403 writer.overallocate = 1;
404
405 len = PyUnicode_GET_LENGTH(pystr);
406 buf = PyUnicode_DATA(pystr);
407 kind = PyUnicode_KIND(pystr);
408
409 if (end < 0 || len < end) {
410 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
411 goto bail;
412 }
413 while (1) {
414 /* Find the end of the string or the next escape */
415 Py_UCS4 c;
416 {
417 // Use tight scope variable to help register allocation.
418 Py_UCS4 d = 0;
419 for (next = end; next < len; next++) {
420 d = PyUnicode_READ(kind, buf, next);
421 if (d == '"' || d == '\\') {
422 break;
423 }
424 if (d <= 0x1f && strict) {
425 raise_errmsg("Invalid control character at", pystr, next);
426 goto bail;
427 }
428 }
429 c = d;
430 }
431
432 if (c == '"') {
433 // Fast path for simple case.
434 if (writer.buffer == NULL) {
435 PyObject *ret = PyUnicode_Substring(pystr, end, next);
436 if (ret == NULL) {
437 goto bail;
438 }
439 *next_end_ptr = next + 1;;
440 return ret;
441 }
442 }
443 else if (c != '\\') {
444 raise_errmsg("Unterminated string starting at", pystr, begin);
445 goto bail;
446 }
447
448 /* Pick up this chunk if it's not zero length */
449 if (next != end) {
450 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
451 goto bail;
452 }
453 }
454 next++;
455 if (c == '"') {
456 end = next;
457 break;
458 }
459 if (next == len) {
460 raise_errmsg("Unterminated string starting at", pystr, begin);
461 goto bail;
462 }
463 c = PyUnicode_READ(kind, buf, next);
464 if (c != 'u') {
465 /* Non-unicode backslash escapes */
466 end = next + 1;
467 switch (c) {
468 case '"': break;
469 case '\\': break;
470 case '/': break;
471 case 'b': c = '\b'; break;
472 case 'f': c = '\f'; break;
473 case 'n': c = '\n'; break;
474 case 'r': c = '\r'; break;
475 case 't': c = '\t'; break;
476 default: c = 0;
477 }
478 if (c == 0) {
479 raise_errmsg("Invalid \\escape", pystr, end - 2);
480 goto bail;
481 }
482 }
483 else {
484 c = 0;
485 next++;
486 end = next + 4;
487 if (end >= len) {
488 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
489 goto bail;
490 }
491 /* Decode 4 hex digits */
492 for (; next < end; next++) {
493 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
494 c <<= 4;
495 switch (digit) {
496 case '0': case '1': case '2': case '3': case '4':
497 case '5': case '6': case '7': case '8': case '9':
498 c |= (digit - '0'); break;
499 case 'a': case 'b': case 'c': case 'd': case 'e':
500 case 'f':
501 c |= (digit - 'a' + 10); break;
502 case 'A': case 'B': case 'C': case 'D': case 'E':
503 case 'F':
504 c |= (digit - 'A' + 10); break;
505 default:
506 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
507 goto bail;
508 }
509 }
510 /* Surrogate pair */
511 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
512 PyUnicode_READ(kind, buf, next++) == '\\' &&
513 PyUnicode_READ(kind, buf, next++) == 'u') {
514 Py_UCS4 c2 = 0;
515 end += 6;
516 /* Decode 4 hex digits */
517 for (; next < end; next++) {
518 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
519 c2 <<= 4;
520 switch (digit) {
521 case '0': case '1': case '2': case '3': case '4':
522 case '5': case '6': case '7': case '8': case '9':
523 c2 |= (digit - '0'); break;
524 case 'a': case 'b': case 'c': case 'd': case 'e':
525 case 'f':
526 c2 |= (digit - 'a' + 10); break;
527 case 'A': case 'B': case 'C': case 'D': case 'E':
528 case 'F':
529 c2 |= (digit - 'A' + 10); break;
530 default:
531 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
532 goto bail;
533 }
534 }
535 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
536 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
537 else
538 end -= 6;
539 }
540 }
541 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
542 goto bail;
543 }
544 }
545
546 rval = _PyUnicodeWriter_Finish(&writer);
547 *next_end_ptr = end;
548 return rval;
549
550 bail:
551 *next_end_ptr = -1;
552 _PyUnicodeWriter_Dealloc(&writer);
553 return NULL;
554 }
555
556 PyDoc_STRVAR(pydoc_scanstring,
557 "scanstring(string, end, strict=True) -> (string, end)\n"
558 "\n"
559 "Scan the string s for a JSON string. End is the index of the\n"
560 "character in s after the quote that started the JSON string.\n"
561 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
562 "on attempt to decode an invalid string. If strict is False then literal\n"
563 "control characters are allowed in the string.\n"
564 "\n"
565 "Returns a tuple of the decoded string and the index of the character in s\n"
566 "after the end quote."
567 );
568
569 static PyObject *
py_scanstring(PyObject * Py_UNUSED (self),PyObject * args)570 py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
571 {
572 PyObject *pystr;
573 PyObject *rval;
574 Py_ssize_t end;
575 Py_ssize_t next_end = -1;
576 int strict = 1;
577 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
578 return NULL;
579 }
580 if (PyUnicode_Check(pystr)) {
581 rval = scanstring_unicode(pystr, end, strict, &next_end);
582 }
583 else {
584 PyErr_Format(PyExc_TypeError,
585 "first argument must be a string, not %.80s",
586 Py_TYPE(pystr)->tp_name);
587 return NULL;
588 }
589 return _build_rval_index_tuple(rval, next_end);
590 }
591
592 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
593 "encode_basestring_ascii(string) -> string\n"
594 "\n"
595 "Return an ASCII-only JSON representation of a Python string"
596 );
597
598 static PyObject *
py_encode_basestring_ascii(PyObject * Py_UNUSED (self),PyObject * pystr)599 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
600 {
601 PyObject *rval;
602 /* Return an ASCII-only JSON representation of a Python string */
603 /* METH_O */
604 if (PyUnicode_Check(pystr)) {
605 rval = ascii_escape_unicode(pystr);
606 }
607 else {
608 PyErr_Format(PyExc_TypeError,
609 "first argument must be a string, not %.80s",
610 Py_TYPE(pystr)->tp_name);
611 return NULL;
612 }
613 return rval;
614 }
615
616
617 PyDoc_STRVAR(pydoc_encode_basestring,
618 "encode_basestring(string) -> string\n"
619 "\n"
620 "Return a JSON representation of a Python string"
621 );
622
623 static PyObject *
py_encode_basestring(PyObject * Py_UNUSED (self),PyObject * pystr)624 py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
625 {
626 PyObject *rval;
627 /* Return a JSON representation of a Python string */
628 /* METH_O */
629 if (PyUnicode_Check(pystr)) {
630 rval = escape_unicode(pystr);
631 }
632 else {
633 PyErr_Format(PyExc_TypeError,
634 "first argument must be a string, not %.80s",
635 Py_TYPE(pystr)->tp_name);
636 return NULL;
637 }
638 return rval;
639 }
640
641 static void
scanner_dealloc(PyObject * self)642 scanner_dealloc(PyObject *self)
643 {
644 PyTypeObject *tp = Py_TYPE(self);
645 /* bpo-31095: UnTrack is needed before calling any callbacks */
646 PyObject_GC_UnTrack(self);
647 scanner_clear((PyScannerObject *)self);
648 tp->tp_free(self);
649 Py_DECREF(tp);
650 }
651
652 static int
scanner_traverse(PyScannerObject * self,visitproc visit,void * arg)653 scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
654 {
655 Py_VISIT(Py_TYPE(self));
656 Py_VISIT(self->object_hook);
657 Py_VISIT(self->object_pairs_hook);
658 Py_VISIT(self->parse_float);
659 Py_VISIT(self->parse_int);
660 Py_VISIT(self->parse_constant);
661 Py_VISIT(self->memo);
662 return 0;
663 }
664
665 static int
scanner_clear(PyScannerObject * self)666 scanner_clear(PyScannerObject *self)
667 {
668 Py_CLEAR(self->object_hook);
669 Py_CLEAR(self->object_pairs_hook);
670 Py_CLEAR(self->parse_float);
671 Py_CLEAR(self->parse_int);
672 Py_CLEAR(self->parse_constant);
673 Py_CLEAR(self->memo);
674 return 0;
675 }
676
677 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)678 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
679 {
680 /* Read a JSON object from PyUnicode pystr.
681 idx is the index of the first character after the opening curly brace.
682 *next_idx_ptr is a return-by-reference index to the first character after
683 the closing curly brace.
684
685 Returns a new PyObject (usually a dict, but object_hook can change that)
686 */
687 const void *str;
688 int kind;
689 Py_ssize_t end_idx;
690 PyObject *val = NULL;
691 PyObject *rval = NULL;
692 PyObject *key = NULL;
693 int has_pairs_hook = (s->object_pairs_hook != Py_None);
694 Py_ssize_t next_idx;
695
696 if (PyUnicode_READY(pystr) == -1)
697 return NULL;
698
699 str = PyUnicode_DATA(pystr);
700 kind = PyUnicode_KIND(pystr);
701 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
702
703 if (has_pairs_hook)
704 rval = PyList_New(0);
705 else
706 rval = PyDict_New();
707 if (rval == NULL)
708 return NULL;
709
710 /* skip whitespace after { */
711 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
712
713 /* only loop if the object is non-empty */
714 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
715 while (1) {
716 PyObject *memokey;
717
718 /* read key */
719 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
720 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
721 goto bail;
722 }
723 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
724 if (key == NULL)
725 goto bail;
726 memokey = PyDict_SetDefault(s->memo, key, key);
727 if (memokey == NULL) {
728 goto bail;
729 }
730 Py_INCREF(memokey);
731 Py_DECREF(key);
732 key = memokey;
733 idx = next_idx;
734
735 /* skip whitespace between key and : delimiter, read :, skip whitespace */
736 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
737 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
738 raise_errmsg("Expecting ':' delimiter", pystr, idx);
739 goto bail;
740 }
741 idx++;
742 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
743
744 /* read any JSON term */
745 val = scan_once_unicode(s, pystr, idx, &next_idx);
746 if (val == NULL)
747 goto bail;
748
749 if (has_pairs_hook) {
750 PyObject *item = PyTuple_Pack(2, key, val);
751 if (item == NULL)
752 goto bail;
753 Py_CLEAR(key);
754 Py_CLEAR(val);
755 if (PyList_Append(rval, item) == -1) {
756 Py_DECREF(item);
757 goto bail;
758 }
759 Py_DECREF(item);
760 }
761 else {
762 if (PyDict_SetItem(rval, key, val) < 0)
763 goto bail;
764 Py_CLEAR(key);
765 Py_CLEAR(val);
766 }
767 idx = next_idx;
768
769 /* skip whitespace before } or , */
770 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
771
772 /* bail if the object is closed or we didn't get the , delimiter */
773 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
774 break;
775 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
776 raise_errmsg("Expecting ',' delimiter", pystr, idx);
777 goto bail;
778 }
779 idx++;
780
781 /* skip whitespace after , delimiter */
782 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
783 }
784 }
785
786 *next_idx_ptr = idx + 1;
787
788 if (has_pairs_hook) {
789 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
790 Py_DECREF(rval);
791 return val;
792 }
793
794 /* if object_hook is not None: rval = object_hook(rval) */
795 if (s->object_hook != Py_None) {
796 val = PyObject_CallOneArg(s->object_hook, rval);
797 Py_DECREF(rval);
798 return val;
799 }
800 return rval;
801 bail:
802 Py_XDECREF(key);
803 Py_XDECREF(val);
804 Py_XDECREF(rval);
805 return NULL;
806 }
807
808 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)809 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
810 /* Read a JSON array from PyUnicode pystr.
811 idx is the index of the first character after the opening brace.
812 *next_idx_ptr is a return-by-reference index to the first character after
813 the closing brace.
814
815 Returns a new PyList
816 */
817 const void *str;
818 int kind;
819 Py_ssize_t end_idx;
820 PyObject *val = NULL;
821 PyObject *rval;
822 Py_ssize_t next_idx;
823
824 if (PyUnicode_READY(pystr) == -1)
825 return NULL;
826
827 rval = PyList_New(0);
828 if (rval == NULL)
829 return NULL;
830
831 str = PyUnicode_DATA(pystr);
832 kind = PyUnicode_KIND(pystr);
833 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
834
835 /* skip whitespace after [ */
836 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
837
838 /* only loop if the array is non-empty */
839 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
840 while (1) {
841
842 /* read any JSON term */
843 val = scan_once_unicode(s, pystr, idx, &next_idx);
844 if (val == NULL)
845 goto bail;
846
847 if (PyList_Append(rval, val) == -1)
848 goto bail;
849
850 Py_CLEAR(val);
851 idx = next_idx;
852
853 /* skip whitespace between term and , */
854 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
855
856 /* bail if the array is closed or we didn't get the , delimiter */
857 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
858 break;
859 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
860 raise_errmsg("Expecting ',' delimiter", pystr, idx);
861 goto bail;
862 }
863 idx++;
864
865 /* skip whitespace after , */
866 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
867 }
868 }
869
870 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
871 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
872 raise_errmsg("Expecting value", pystr, end_idx);
873 goto bail;
874 }
875 *next_idx_ptr = idx + 1;
876 return rval;
877 bail:
878 Py_XDECREF(val);
879 Py_DECREF(rval);
880 return NULL;
881 }
882
883 static PyObject *
_parse_constant(PyScannerObject * s,const char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)884 _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
885 /* Read a JSON constant.
886 constant is the constant string that was found
887 ("NaN", "Infinity", "-Infinity").
888 idx is the index of the first character of the constant
889 *next_idx_ptr is a return-by-reference index to the first character after
890 the constant.
891
892 Returns the result of parse_constant
893 */
894 PyObject *cstr;
895 PyObject *rval;
896 /* constant is "NaN", "Infinity", or "-Infinity" */
897 cstr = PyUnicode_InternFromString(constant);
898 if (cstr == NULL)
899 return NULL;
900
901 /* rval = parse_constant(constant) */
902 rval = PyObject_CallOneArg(s->parse_constant, cstr);
903 idx += PyUnicode_GET_LENGTH(cstr);
904 Py_DECREF(cstr);
905 *next_idx_ptr = idx;
906 return rval;
907 }
908
909 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)910 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
911 /* Read a JSON number from PyUnicode pystr.
912 idx is the index of the first character of the number
913 *next_idx_ptr is a return-by-reference index to the first character after
914 the number.
915
916 Returns a new PyObject representation of that number:
917 PyLong, or PyFloat.
918 May return other types if parse_int or parse_float are set
919 */
920 const void *str;
921 int kind;
922 Py_ssize_t end_idx;
923 Py_ssize_t idx = start;
924 int is_float = 0;
925 PyObject *rval;
926 PyObject *numstr = NULL;
927 PyObject *custom_func;
928
929 if (PyUnicode_READY(pystr) == -1)
930 return NULL;
931
932 str = PyUnicode_DATA(pystr);
933 kind = PyUnicode_KIND(pystr);
934 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
935
936 /* read a sign if it's there, make sure it's not the end of the string */
937 if (PyUnicode_READ(kind, str, idx) == '-') {
938 idx++;
939 if (idx > end_idx) {
940 raise_stop_iteration(start);
941 return NULL;
942 }
943 }
944
945 /* read as many integer digits as we find as long as it doesn't start with 0 */
946 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
947 idx++;
948 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
949 }
950 /* if it starts with 0 we only expect one integer digit */
951 else if (PyUnicode_READ(kind, str, idx) == '0') {
952 idx++;
953 }
954 /* no integer digits, error */
955 else {
956 raise_stop_iteration(start);
957 return NULL;
958 }
959
960 /* if the next char is '.' followed by a digit then read all float digits */
961 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
962 is_float = 1;
963 idx += 2;
964 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
965 }
966
967 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
968 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
969 Py_ssize_t e_start = idx;
970 idx++;
971
972 /* read an exponent sign if present */
973 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
974
975 /* read all digits */
976 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
977
978 /* if we got a digit, then parse as float. if not, backtrack */
979 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
980 is_float = 1;
981 }
982 else {
983 idx = e_start;
984 }
985 }
986
987 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
988 custom_func = s->parse_float;
989 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
990 custom_func = s->parse_int;
991 else
992 custom_func = NULL;
993
994 if (custom_func) {
995 /* copy the section we determined to be a number */
996 numstr = PyUnicode_FromKindAndData(kind,
997 (char*)str + kind * start,
998 idx - start);
999 if (numstr == NULL)
1000 return NULL;
1001 rval = PyObject_CallOneArg(custom_func, numstr);
1002 }
1003 else {
1004 Py_ssize_t i, n;
1005 char *buf;
1006 /* Straight conversion to ASCII, to avoid costly conversion of
1007 decimal unicode digits (which cannot appear here) */
1008 n = idx - start;
1009 numstr = PyBytes_FromStringAndSize(NULL, n);
1010 if (numstr == NULL)
1011 return NULL;
1012 buf = PyBytes_AS_STRING(numstr);
1013 for (i = 0; i < n; i++) {
1014 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
1015 }
1016 if (is_float)
1017 rval = PyFloat_FromString(numstr);
1018 else
1019 rval = PyLong_FromString(buf, NULL, 10);
1020 }
1021 Py_DECREF(numstr);
1022 *next_idx_ptr = idx;
1023 return rval;
1024 }
1025
1026 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1027 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1028 {
1029 /* Read one JSON term (of any kind) from PyUnicode pystr.
1030 idx is the index of the first character of the term
1031 *next_idx_ptr is a return-by-reference index to the first character after
1032 the number.
1033
1034 Returns a new PyObject representation of the term.
1035 */
1036 PyObject *res;
1037 const void *str;
1038 int kind;
1039 Py_ssize_t length;
1040
1041 if (PyUnicode_READY(pystr) == -1)
1042 return NULL;
1043
1044 str = PyUnicode_DATA(pystr);
1045 kind = PyUnicode_KIND(pystr);
1046 length = PyUnicode_GET_LENGTH(pystr);
1047
1048 if (idx < 0) {
1049 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1050 return NULL;
1051 }
1052 if (idx >= length) {
1053 raise_stop_iteration(idx);
1054 return NULL;
1055 }
1056
1057 switch (PyUnicode_READ(kind, str, idx)) {
1058 case '"':
1059 /* string */
1060 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1061 case '{':
1062 /* object */
1063 if (_Py_EnterRecursiveCall(" while decoding a JSON object "
1064 "from a unicode string"))
1065 return NULL;
1066 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1067 _Py_LeaveRecursiveCall();
1068 return res;
1069 case '[':
1070 /* array */
1071 if (_Py_EnterRecursiveCall(" while decoding a JSON array "
1072 "from a unicode string"))
1073 return NULL;
1074 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1075 _Py_LeaveRecursiveCall();
1076 return res;
1077 case 'n':
1078 /* null */
1079 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1080 *next_idx_ptr = idx + 4;
1081 Py_RETURN_NONE;
1082 }
1083 break;
1084 case 't':
1085 /* true */
1086 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1087 *next_idx_ptr = idx + 4;
1088 Py_RETURN_TRUE;
1089 }
1090 break;
1091 case 'f':
1092 /* false */
1093 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1094 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1095 PyUnicode_READ(kind, str, idx + 3) == 's' &&
1096 PyUnicode_READ(kind, str, idx + 4) == 'e') {
1097 *next_idx_ptr = idx + 5;
1098 Py_RETURN_FALSE;
1099 }
1100 break;
1101 case 'N':
1102 /* NaN */
1103 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1104 PyUnicode_READ(kind, str, idx + 2) == 'N') {
1105 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1106 }
1107 break;
1108 case 'I':
1109 /* Infinity */
1110 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1111 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1112 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1113 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1114 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1115 PyUnicode_READ(kind, str, idx + 6) == 't' &&
1116 PyUnicode_READ(kind, str, idx + 7) == 'y') {
1117 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1118 }
1119 break;
1120 case '-':
1121 /* -Infinity */
1122 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1123 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1124 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1125 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1126 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1127 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1128 PyUnicode_READ(kind, str, idx + 7) == 't' &&
1129 PyUnicode_READ(kind, str, idx + 8) == 'y') {
1130 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1131 }
1132 break;
1133 }
1134 /* Didn't find a string, object, array, or named constant. Look for a number. */
1135 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1136 }
1137
1138 static PyObject *
scanner_call(PyScannerObject * self,PyObject * args,PyObject * kwds)1139 scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1140 {
1141 /* Python callable interface to scan_once_{str,unicode} */
1142 PyObject *pystr;
1143 PyObject *rval;
1144 Py_ssize_t idx;
1145 Py_ssize_t next_idx = -1;
1146 static char *kwlist[] = {"string", "idx", NULL};
1147 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1148 return NULL;
1149
1150 if (PyUnicode_Check(pystr)) {
1151 rval = scan_once_unicode(self, pystr, idx, &next_idx);
1152 }
1153 else {
1154 PyErr_Format(PyExc_TypeError,
1155 "first argument must be a string, not %.80s",
1156 Py_TYPE(pystr)->tp_name);
1157 return NULL;
1158 }
1159 PyDict_Clear(self->memo);
1160 if (rval == NULL)
1161 return NULL;
1162 return _build_rval_index_tuple(rval, next_idx);
1163 }
1164
1165 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1166 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1167 {
1168 PyScannerObject *s;
1169 PyObject *ctx;
1170 PyObject *strict;
1171 static char *kwlist[] = {"context", NULL};
1172
1173 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1174 return NULL;
1175
1176 s = (PyScannerObject *)type->tp_alloc(type, 0);
1177 if (s == NULL) {
1178 return NULL;
1179 }
1180
1181 s->memo = PyDict_New();
1182 if (s->memo == NULL)
1183 goto bail;
1184
1185 /* All of these will fail "gracefully" so we don't need to verify them */
1186 strict = PyObject_GetAttrString(ctx, "strict");
1187 if (strict == NULL)
1188 goto bail;
1189 s->strict = PyObject_IsTrue(strict);
1190 Py_DECREF(strict);
1191 if (s->strict < 0)
1192 goto bail;
1193 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1194 if (s->object_hook == NULL)
1195 goto bail;
1196 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1197 if (s->object_pairs_hook == NULL)
1198 goto bail;
1199 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1200 if (s->parse_float == NULL)
1201 goto bail;
1202 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1203 if (s->parse_int == NULL)
1204 goto bail;
1205 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1206 if (s->parse_constant == NULL)
1207 goto bail;
1208
1209 return (PyObject *)s;
1210
1211 bail:
1212 Py_DECREF(s);
1213 return NULL;
1214 }
1215
1216 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1217
1218 static PyType_Slot PyScannerType_slots[] = {
1219 {Py_tp_doc, (void *)scanner_doc},
1220 {Py_tp_dealloc, scanner_dealloc},
1221 {Py_tp_call, scanner_call},
1222 {Py_tp_traverse, scanner_traverse},
1223 {Py_tp_clear, scanner_clear},
1224 {Py_tp_members, scanner_members},
1225 {Py_tp_new, scanner_new},
1226 {0, 0}
1227 };
1228
1229 static PyType_Spec PyScannerType_spec = {
1230 .name = "_json.Scanner",
1231 .basicsize = sizeof(PyScannerObject),
1232 .itemsize = 0,
1233 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1234 .slots = PyScannerType_slots,
1235 };
1236
1237 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1238 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1239 {
1240 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1241
1242 PyEncoderObject *s;
1243 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1244 PyObject *item_separator;
1245 int sort_keys, skipkeys, allow_nan;
1246
1247 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1248 &markers, &defaultfn, &encoder, &indent,
1249 &key_separator, &item_separator,
1250 &sort_keys, &skipkeys, &allow_nan))
1251 return NULL;
1252
1253 if (markers != Py_None && !PyDict_Check(markers)) {
1254 PyErr_Format(PyExc_TypeError,
1255 "make_encoder() argument 1 must be dict or None, "
1256 "not %.200s", Py_TYPE(markers)->tp_name);
1257 return NULL;
1258 }
1259
1260 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1261 if (s == NULL)
1262 return NULL;
1263
1264 s->markers = markers;
1265 s->defaultfn = defaultfn;
1266 s->encoder = encoder;
1267 s->indent = indent;
1268 s->key_separator = key_separator;
1269 s->item_separator = item_separator;
1270 s->sort_keys = sort_keys;
1271 s->skipkeys = skipkeys;
1272 s->allow_nan = allow_nan;
1273 s->fast_encode = NULL;
1274 if (PyCFunction_Check(s->encoder)) {
1275 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1276 if (f == (PyCFunction)py_encode_basestring_ascii ||
1277 f == (PyCFunction)py_encode_basestring) {
1278 s->fast_encode = f;
1279 }
1280 }
1281
1282 Py_INCREF(s->markers);
1283 Py_INCREF(s->defaultfn);
1284 Py_INCREF(s->encoder);
1285 Py_INCREF(s->indent);
1286 Py_INCREF(s->key_separator);
1287 Py_INCREF(s->item_separator);
1288 return (PyObject *)s;
1289 }
1290
1291 static PyObject *
encoder_call(PyEncoderObject * self,PyObject * args,PyObject * kwds)1292 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1293 {
1294 /* Python callable interface to encode_listencode_obj */
1295 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1296 PyObject *obj;
1297 Py_ssize_t indent_level;
1298 _PyAccu acc;
1299 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1300 &obj, &indent_level))
1301 return NULL;
1302 if (_PyAccu_Init(&acc))
1303 return NULL;
1304 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
1305 _PyAccu_Destroy(&acc);
1306 return NULL;
1307 }
1308 return _PyAccu_FinishAsList(&acc);
1309 }
1310
1311 static PyObject *
_encoded_const(PyObject * obj)1312 _encoded_const(PyObject *obj)
1313 {
1314 /* Return the JSON string representation of None, True, False */
1315 if (obj == Py_None) {
1316 _Py_static_string(PyId_null, "null");
1317 PyObject *s_null = _PyUnicode_FromId(&PyId_null);
1318 if (s_null == NULL) {
1319 return NULL;
1320 }
1321 return Py_NewRef(s_null);
1322 }
1323 else if (obj == Py_True) {
1324 _Py_static_string(PyId_true, "true");
1325 PyObject *s_true = _PyUnicode_FromId(&PyId_true);
1326 if (s_true == NULL) {
1327 return NULL;
1328 }
1329 return Py_NewRef(s_true);
1330 }
1331 else if (obj == Py_False) {
1332 _Py_static_string(PyId_false, "false");
1333 PyObject *s_false = _PyUnicode_FromId(&PyId_false);
1334 if (s_false == NULL) {
1335 return NULL;
1336 }
1337 return Py_NewRef(s_false);
1338 }
1339 else {
1340 PyErr_SetString(PyExc_ValueError, "not a const");
1341 return NULL;
1342 }
1343 }
1344
1345 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1346 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1347 {
1348 /* Return the JSON representation of a PyFloat. */
1349 double i = PyFloat_AS_DOUBLE(obj);
1350 if (!Py_IS_FINITE(i)) {
1351 if (!s->allow_nan) {
1352 PyErr_SetString(
1353 PyExc_ValueError,
1354 "Out of range float values are not JSON compliant"
1355 );
1356 return NULL;
1357 }
1358 if (i > 0) {
1359 return PyUnicode_FromString("Infinity");
1360 }
1361 else if (i < 0) {
1362 return PyUnicode_FromString("-Infinity");
1363 }
1364 else {
1365 return PyUnicode_FromString("NaN");
1366 }
1367 }
1368 return PyFloat_Type.tp_repr(obj);
1369 }
1370
1371 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1372 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1373 {
1374 /* Return the JSON representation of a string */
1375 PyObject *encoded;
1376
1377 if (s->fast_encode) {
1378 return s->fast_encode(NULL, obj);
1379 }
1380 encoded = PyObject_CallOneArg(s->encoder, obj);
1381 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1382 PyErr_Format(PyExc_TypeError,
1383 "encoder() must return a string, not %.80s",
1384 Py_TYPE(encoded)->tp_name);
1385 Py_DECREF(encoded);
1386 return NULL;
1387 }
1388 return encoded;
1389 }
1390
1391 static int
_steal_accumulate(_PyAccu * acc,PyObject * stolen)1392 _steal_accumulate(_PyAccu *acc, PyObject *stolen)
1393 {
1394 /* Append stolen and then decrement its reference count */
1395 int rval = _PyAccu_Accumulate(acc, stolen);
1396 Py_DECREF(stolen);
1397 return rval;
1398 }
1399
1400 static int
encoder_listencode_obj(PyEncoderObject * s,_PyAccu * acc,PyObject * obj,Py_ssize_t indent_level)1401 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
1402 PyObject *obj, Py_ssize_t indent_level)
1403 {
1404 /* Encode Python object obj to a JSON term */
1405 PyObject *newobj;
1406 int rv;
1407
1408 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1409 PyObject *cstr = _encoded_const(obj);
1410 if (cstr == NULL)
1411 return -1;
1412 return _steal_accumulate(acc, cstr);
1413 }
1414 else if (PyUnicode_Check(obj))
1415 {
1416 PyObject *encoded = encoder_encode_string(s, obj);
1417 if (encoded == NULL)
1418 return -1;
1419 return _steal_accumulate(acc, encoded);
1420 }
1421 else if (PyLong_Check(obj)) {
1422 PyObject *encoded = PyLong_Type.tp_repr(obj);
1423 if (encoded == NULL)
1424 return -1;
1425 return _steal_accumulate(acc, encoded);
1426 }
1427 else if (PyFloat_Check(obj)) {
1428 PyObject *encoded = encoder_encode_float(s, obj);
1429 if (encoded == NULL)
1430 return -1;
1431 return _steal_accumulate(acc, encoded);
1432 }
1433 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1434 if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1435 return -1;
1436 rv = encoder_listencode_list(s, acc, obj, indent_level);
1437 _Py_LeaveRecursiveCall();
1438 return rv;
1439 }
1440 else if (PyDict_Check(obj)) {
1441 if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1442 return -1;
1443 rv = encoder_listencode_dict(s, acc, obj, indent_level);
1444 _Py_LeaveRecursiveCall();
1445 return rv;
1446 }
1447 else {
1448 PyObject *ident = NULL;
1449 if (s->markers != Py_None) {
1450 int has_key;
1451 ident = PyLong_FromVoidPtr(obj);
1452 if (ident == NULL)
1453 return -1;
1454 has_key = PyDict_Contains(s->markers, ident);
1455 if (has_key) {
1456 if (has_key != -1)
1457 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1458 Py_DECREF(ident);
1459 return -1;
1460 }
1461 if (PyDict_SetItem(s->markers, ident, obj)) {
1462 Py_DECREF(ident);
1463 return -1;
1464 }
1465 }
1466 newobj = PyObject_CallOneArg(s->defaultfn, obj);
1467 if (newobj == NULL) {
1468 Py_XDECREF(ident);
1469 return -1;
1470 }
1471
1472 if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
1473 Py_DECREF(newobj);
1474 Py_XDECREF(ident);
1475 return -1;
1476 }
1477 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
1478 _Py_LeaveRecursiveCall();
1479
1480 Py_DECREF(newobj);
1481 if (rv) {
1482 Py_XDECREF(ident);
1483 return -1;
1484 }
1485 if (ident != NULL) {
1486 if (PyDict_DelItem(s->markers, ident)) {
1487 Py_XDECREF(ident);
1488 return -1;
1489 }
1490 Py_XDECREF(ident);
1491 }
1492 return rv;
1493 }
1494 }
1495
1496 static int
encoder_listencode_dict(PyEncoderObject * s,_PyAccu * acc,PyObject * dct,Py_ssize_t indent_level)1497 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
1498 PyObject *dct, Py_ssize_t indent_level)
1499 {
1500 /* Encode Python dict dct a JSON term */
1501 _Py_static_string(PyId_open_dict, "{");
1502 _Py_static_string(PyId_close_dict, "}");
1503 _Py_static_string(PyId_empty_dict, "{}");
1504 PyObject *open_dict = _PyUnicode_FromId(&PyId_open_dict); // borrowed ref
1505 PyObject *close_dict = _PyUnicode_FromId(&PyId_close_dict); // borrowed ref
1506 PyObject *empty_dict = _PyUnicode_FromId(&PyId_empty_dict); // borrowed ref
1507 PyObject *kstr = NULL;
1508 PyObject *ident = NULL;
1509 PyObject *it = NULL;
1510 PyObject *items;
1511 PyObject *item = NULL;
1512 Py_ssize_t idx;
1513
1514 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1515 return -1;
1516 }
1517 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1518 return _PyAccu_Accumulate(acc, empty_dict);
1519
1520 if (s->markers != Py_None) {
1521 int has_key;
1522 ident = PyLong_FromVoidPtr(dct);
1523 if (ident == NULL)
1524 goto bail;
1525 has_key = PyDict_Contains(s->markers, ident);
1526 if (has_key) {
1527 if (has_key != -1)
1528 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1529 goto bail;
1530 }
1531 if (PyDict_SetItem(s->markers, ident, dct)) {
1532 goto bail;
1533 }
1534 }
1535
1536 if (_PyAccu_Accumulate(acc, open_dict))
1537 goto bail;
1538
1539 if (s->indent != Py_None) {
1540 /* TODO: DOES NOT RUN */
1541 indent_level += 1;
1542 /*
1543 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1544 separator = _item_separator + newline_indent
1545 buf += newline_indent
1546 */
1547 }
1548
1549 items = PyMapping_Items(dct);
1550 if (items == NULL)
1551 goto bail;
1552 if (s->sort_keys && PyList_Sort(items) < 0) {
1553 Py_DECREF(items);
1554 goto bail;
1555 }
1556 it = PyObject_GetIter(items);
1557 Py_DECREF(items);
1558 if (it == NULL)
1559 goto bail;
1560 idx = 0;
1561 while ((item = PyIter_Next(it)) != NULL) {
1562 PyObject *encoded, *key, *value;
1563 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1564 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1565 goto bail;
1566 }
1567 key = PyTuple_GET_ITEM(item, 0);
1568 if (PyUnicode_Check(key)) {
1569 Py_INCREF(key);
1570 kstr = key;
1571 }
1572 else if (PyFloat_Check(key)) {
1573 kstr = encoder_encode_float(s, key);
1574 if (kstr == NULL)
1575 goto bail;
1576 }
1577 else if (key == Py_True || key == Py_False || key == Py_None) {
1578 /* This must come before the PyLong_Check because
1579 True and False are also 1 and 0.*/
1580 kstr = _encoded_const(key);
1581 if (kstr == NULL)
1582 goto bail;
1583 }
1584 else if (PyLong_Check(key)) {
1585 kstr = PyLong_Type.tp_repr(key);
1586 if (kstr == NULL) {
1587 goto bail;
1588 }
1589 }
1590 else if (s->skipkeys) {
1591 Py_DECREF(item);
1592 continue;
1593 }
1594 else {
1595 PyErr_Format(PyExc_TypeError,
1596 "keys must be str, int, float, bool or None, "
1597 "not %.100s", Py_TYPE(key)->tp_name);
1598 goto bail;
1599 }
1600
1601 if (idx) {
1602 if (_PyAccu_Accumulate(acc, s->item_separator))
1603 goto bail;
1604 }
1605
1606 encoded = encoder_encode_string(s, kstr);
1607 Py_CLEAR(kstr);
1608 if (encoded == NULL)
1609 goto bail;
1610 if (_PyAccu_Accumulate(acc, encoded)) {
1611 Py_DECREF(encoded);
1612 goto bail;
1613 }
1614 Py_DECREF(encoded);
1615 if (_PyAccu_Accumulate(acc, s->key_separator))
1616 goto bail;
1617
1618 value = PyTuple_GET_ITEM(item, 1);
1619 if (encoder_listencode_obj(s, acc, value, indent_level))
1620 goto bail;
1621 idx += 1;
1622 Py_DECREF(item);
1623 }
1624 if (PyErr_Occurred())
1625 goto bail;
1626 Py_CLEAR(it);
1627
1628 if (ident != NULL) {
1629 if (PyDict_DelItem(s->markers, ident))
1630 goto bail;
1631 Py_CLEAR(ident);
1632 }
1633 /* TODO DOES NOT RUN; dead code
1634 if (s->indent != Py_None) {
1635 indent_level -= 1;
1636
1637 yield '\n' + (' ' * (_indent * _current_indent_level))
1638 }*/
1639 if (_PyAccu_Accumulate(acc, close_dict))
1640 goto bail;
1641 return 0;
1642
1643 bail:
1644 Py_XDECREF(it);
1645 Py_XDECREF(item);
1646 Py_XDECREF(kstr);
1647 Py_XDECREF(ident);
1648 return -1;
1649 }
1650
1651
1652 static int
encoder_listencode_list(PyEncoderObject * s,_PyAccu * acc,PyObject * seq,Py_ssize_t indent_level)1653 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
1654 PyObject *seq, Py_ssize_t indent_level)
1655 {
1656 /* Encode Python list seq to a JSON term */
1657 _Py_static_string(PyId_open_array, "[");
1658 _Py_static_string(PyId_close_array, "]");
1659 _Py_static_string(PyId_empty_array, "[]");
1660 PyObject *open_array = _PyUnicode_FromId(&PyId_open_array); // borrowed ref
1661 PyObject *close_array = _PyUnicode_FromId(&PyId_close_array); // borrowed ref
1662 PyObject *empty_array = _PyUnicode_FromId(&PyId_empty_array); // borrowed ref
1663 PyObject *ident = NULL;
1664 PyObject *s_fast = NULL;
1665 Py_ssize_t i;
1666
1667 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1668 return -1;
1669 }
1670 ident = NULL;
1671 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1672 if (s_fast == NULL)
1673 return -1;
1674 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1675 Py_DECREF(s_fast);
1676 return _PyAccu_Accumulate(acc, empty_array);
1677 }
1678
1679 if (s->markers != Py_None) {
1680 int has_key;
1681 ident = PyLong_FromVoidPtr(seq);
1682 if (ident == NULL)
1683 goto bail;
1684 has_key = PyDict_Contains(s->markers, ident);
1685 if (has_key) {
1686 if (has_key != -1)
1687 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1688 goto bail;
1689 }
1690 if (PyDict_SetItem(s->markers, ident, seq)) {
1691 goto bail;
1692 }
1693 }
1694
1695 if (_PyAccu_Accumulate(acc, open_array))
1696 goto bail;
1697 if (s->indent != Py_None) {
1698 /* TODO: DOES NOT RUN */
1699 indent_level += 1;
1700 /*
1701 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1702 separator = _item_separator + newline_indent
1703 buf += newline_indent
1704 */
1705 }
1706 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1707 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1708 if (i) {
1709 if (_PyAccu_Accumulate(acc, s->item_separator))
1710 goto bail;
1711 }
1712 if (encoder_listencode_obj(s, acc, obj, indent_level))
1713 goto bail;
1714 }
1715 if (ident != NULL) {
1716 if (PyDict_DelItem(s->markers, ident))
1717 goto bail;
1718 Py_CLEAR(ident);
1719 }
1720
1721 /* TODO: DOES NOT RUN
1722 if (s->indent != Py_None) {
1723 indent_level -= 1;
1724
1725 yield '\n' + (' ' * (_indent * _current_indent_level))
1726 }*/
1727 if (_PyAccu_Accumulate(acc, close_array))
1728 goto bail;
1729 Py_DECREF(s_fast);
1730 return 0;
1731
1732 bail:
1733 Py_XDECREF(ident);
1734 Py_DECREF(s_fast);
1735 return -1;
1736 }
1737
1738 static void
encoder_dealloc(PyObject * self)1739 encoder_dealloc(PyObject *self)
1740 {
1741 PyTypeObject *tp = Py_TYPE(self);
1742 /* bpo-31095: UnTrack is needed before calling any callbacks */
1743 PyObject_GC_UnTrack(self);
1744 encoder_clear((PyEncoderObject *)self);
1745 tp->tp_free(self);
1746 Py_DECREF(tp);
1747 }
1748
1749 static int
encoder_traverse(PyEncoderObject * self,visitproc visit,void * arg)1750 encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1751 {
1752 Py_VISIT(Py_TYPE(self));
1753 Py_VISIT(self->markers);
1754 Py_VISIT(self->defaultfn);
1755 Py_VISIT(self->encoder);
1756 Py_VISIT(self->indent);
1757 Py_VISIT(self->key_separator);
1758 Py_VISIT(self->item_separator);
1759 return 0;
1760 }
1761
1762 static int
encoder_clear(PyEncoderObject * self)1763 encoder_clear(PyEncoderObject *self)
1764 {
1765 /* Deallocate Encoder */
1766 Py_CLEAR(self->markers);
1767 Py_CLEAR(self->defaultfn);
1768 Py_CLEAR(self->encoder);
1769 Py_CLEAR(self->indent);
1770 Py_CLEAR(self->key_separator);
1771 Py_CLEAR(self->item_separator);
1772 return 0;
1773 }
1774
1775 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1776
1777 static PyType_Slot PyEncoderType_slots[] = {
1778 {Py_tp_doc, (void *)encoder_doc},
1779 {Py_tp_dealloc, encoder_dealloc},
1780 {Py_tp_call, encoder_call},
1781 {Py_tp_traverse, encoder_traverse},
1782 {Py_tp_clear, encoder_clear},
1783 {Py_tp_members, encoder_members},
1784 {Py_tp_new, encoder_new},
1785 {0, 0}
1786 };
1787
1788 static PyType_Spec PyEncoderType_spec = {
1789 .name = "_json.Encoder",
1790 .basicsize = sizeof(PyEncoderObject),
1791 .itemsize = 0,
1792 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1793 .slots = PyEncoderType_slots
1794 };
1795
1796 static PyMethodDef speedups_methods[] = {
1797 {"encode_basestring_ascii",
1798 (PyCFunction)py_encode_basestring_ascii,
1799 METH_O,
1800 pydoc_encode_basestring_ascii},
1801 {"encode_basestring",
1802 (PyCFunction)py_encode_basestring,
1803 METH_O,
1804 pydoc_encode_basestring},
1805 {"scanstring",
1806 (PyCFunction)py_scanstring,
1807 METH_VARARGS,
1808 pydoc_scanstring},
1809 {NULL, NULL, 0, NULL}
1810 };
1811
1812 PyDoc_STRVAR(module_doc,
1813 "json speedups\n");
1814
1815 static int
_json_exec(PyObject * module)1816 _json_exec(PyObject *module)
1817 {
1818 _jsonmodulestate *state = get_json_state(module);
1819
1820 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1821 if (state->PyScannerType == NULL) {
1822 return -1;
1823 }
1824 Py_INCREF(state->PyScannerType);
1825 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
1826 Py_DECREF(state->PyScannerType);
1827 return -1;
1828 }
1829
1830 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1831 if (state->PyEncoderType == NULL) {
1832 return -1;
1833 }
1834 Py_INCREF(state->PyEncoderType);
1835 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
1836 Py_DECREF(state->PyEncoderType);
1837 return -1;
1838 }
1839
1840 return 0;
1841 }
1842
1843 static int
_jsonmodule_traverse(PyObject * module,visitproc visit,void * arg)1844 _jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1845 {
1846 _jsonmodulestate *state = get_json_state(module);
1847 Py_VISIT(state->PyScannerType);
1848 Py_VISIT(state->PyEncoderType);
1849 return 0;
1850 }
1851
1852 static int
_jsonmodule_clear(PyObject * module)1853 _jsonmodule_clear(PyObject *module)
1854 {
1855 _jsonmodulestate *state = get_json_state(module);
1856 Py_CLEAR(state->PyScannerType);
1857 Py_CLEAR(state->PyEncoderType);
1858 return 0;
1859 }
1860
1861 static void
_jsonmodule_free(void * module)1862 _jsonmodule_free(void *module)
1863 {
1864 _jsonmodule_clear((PyObject *)module);
1865 }
1866
1867 static PyModuleDef_Slot _json_slots[] = {
1868 {Py_mod_exec, _json_exec},
1869 {0, NULL}
1870 };
1871
1872 static struct PyModuleDef jsonmodule = {
1873 PyModuleDef_HEAD_INIT,
1874 "_json",
1875 module_doc,
1876 sizeof(_jsonmodulestate),
1877 speedups_methods,
1878 _json_slots,
1879 _jsonmodule_traverse,
1880 _jsonmodule_clear,
1881 _jsonmodule_free,
1882 };
1883
1884 PyMODINIT_FUNC
PyInit__json(void)1885 PyInit__json(void)
1886 {
1887 return PyModuleDef_Init(&jsonmodule);
1888 }
1889