1"""Implementation of JSONEncoder 2""" 3import re 4 5try: 6 from _json import encode_basestring_ascii as c_encode_basestring_ascii 7except ImportError: 8 c_encode_basestring_ascii = None 9try: 10 from _json import encode_basestring as c_encode_basestring 11except ImportError: 12 c_encode_basestring = None 13try: 14 from _json import make_encoder as c_make_encoder 15except ImportError: 16 c_make_encoder = None 17 18ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 19ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 20HAS_UTF8 = re.compile(b'[\x80-\xff]') 21ESCAPE_DCT = { 22 '\\': '\\\\', 23 '"': '\\"', 24 '\b': '\\b', 25 '\f': '\\f', 26 '\n': '\\n', 27 '\r': '\\r', 28 '\t': '\\t', 29} 30for i in range(0x20): 31 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 32 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 33del i 34 35INFINITY = float('inf') 36 37def py_encode_basestring(s): 38 """Return a JSON representation of a Python string 39 40 """ 41 def replace(match): 42 return ESCAPE_DCT[match.group(0)] 43 return '"' + ESCAPE.sub(replace, s) + '"' 44 45 46encode_basestring = (c_encode_basestring or py_encode_basestring) 47 48 49def py_encode_basestring_ascii(s): 50 """Return an ASCII-only JSON representation of a Python string 51 52 """ 53 def replace(match): 54 s = match.group(0) 55 try: 56 return ESCAPE_DCT[s] 57 except KeyError: 58 n = ord(s) 59 if n < 0x10000: 60 return '\\u{0:04x}'.format(n) 61 #return '\\u%04x' % (n,) 62 else: 63 # surrogate pair 64 n -= 0x10000 65 s1 = 0xd800 | ((n >> 10) & 0x3ff) 66 s2 = 0xdc00 | (n & 0x3ff) 67 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 68 return '"' + ESCAPE_ASCII.sub(replace, s) + '"' 69 70 71encode_basestring_ascii = ( 72 c_encode_basestring_ascii or py_encode_basestring_ascii) 73 74class JSONEncoder(object): 75 """Extensible JSON <https://json.org> encoder for Python data structures. 76 77 Supports the following objects and types by default: 78 79 +-------------------+---------------+ 80 | Python | JSON | 81 +===================+===============+ 82 | dict | object | 83 +-------------------+---------------+ 84 | list, tuple | array | 85 +-------------------+---------------+ 86 | str | string | 87 +-------------------+---------------+ 88 | int, float | number | 89 +-------------------+---------------+ 90 | True | true | 91 +-------------------+---------------+ 92 | False | false | 93 +-------------------+---------------+ 94 | None | null | 95 +-------------------+---------------+ 96 97 To extend this to recognize other objects, subclass and implement a 98 ``.default()`` method with another method that returns a serializable 99 object for ``o`` if possible, otherwise it should call the superclass 100 implementation (to raise ``TypeError``). 101 102 """ 103 item_separator = ', ' 104 key_separator = ': ' 105 def __init__(self, *, skipkeys=False, ensure_ascii=True, 106 check_circular=True, allow_nan=True, sort_keys=False, 107 indent=None, separators=None, default=None): 108 """Constructor for JSONEncoder, with sensible defaults. 109 110 If skipkeys is false, then it is a TypeError to attempt 111 encoding of keys that are not str, int, float or None. If 112 skipkeys is True, such items are simply skipped. 113 114 If ensure_ascii is true, the output is guaranteed to be str 115 objects with all incoming non-ASCII characters escaped. If 116 ensure_ascii is false, the output can contain non-ASCII characters. 117 118 If check_circular is true, then lists, dicts, and custom encoded 119 objects will be checked for circular references during encoding to 120 prevent an infinite recursion (which would cause an RecursionError). 121 Otherwise, no such check takes place. 122 123 If allow_nan is true, then NaN, Infinity, and -Infinity will be 124 encoded as such. This behavior is not JSON specification compliant, 125 but is consistent with most JavaScript based encoders and decoders. 126 Otherwise, it will be a ValueError to encode such floats. 127 128 If sort_keys is true, then the output of dictionaries will be 129 sorted by key; this is useful for regression tests to ensure 130 that JSON serializations can be compared on a day-to-day basis. 131 132 If indent is a non-negative integer, then JSON array 133 elements and object members will be pretty-printed with that 134 indent level. An indent level of 0 will only insert newlines. 135 None is the most compact representation. 136 137 If specified, separators should be an (item_separator, key_separator) 138 tuple. The default is (', ', ': ') if *indent* is ``None`` and 139 (',', ': ') otherwise. To get the most compact JSON representation, 140 you should specify (',', ':') to eliminate whitespace. 141 142 If specified, default is a function that gets called for objects 143 that can't otherwise be serialized. It should return a JSON encodable 144 version of the object or raise a ``TypeError``. 145 146 """ 147 148 self.skipkeys = skipkeys 149 self.ensure_ascii = ensure_ascii 150 self.check_circular = check_circular 151 self.allow_nan = allow_nan 152 self.sort_keys = sort_keys 153 self.indent = indent 154 if separators is not None: 155 self.item_separator, self.key_separator = separators 156 elif indent is not None: 157 self.item_separator = ',' 158 if default is not None: 159 self.default = default 160 161 def default(self, o): 162 """Implement this method in a subclass such that it returns 163 a serializable object for ``o``, or calls the base implementation 164 (to raise a ``TypeError``). 165 166 For example, to support arbitrary iterators, you could 167 implement default like this:: 168 169 def default(self, o): 170 try: 171 iterable = iter(o) 172 except TypeError: 173 pass 174 else: 175 return list(iterable) 176 # Let the base class default method raise the TypeError 177 return JSONEncoder.default(self, o) 178 179 """ 180 raise TypeError(f'Object of type {o.__class__.__name__} ' 181 f'is not JSON serializable') 182 183 def encode(self, o): 184 """Return a JSON string representation of a Python data structure. 185 186 >>> from json.encoder import JSONEncoder 187 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 188 '{"foo": ["bar", "baz"]}' 189 190 """ 191 # This is for extremely simple cases and benchmarks. 192 if isinstance(o, str): 193 if self.ensure_ascii: 194 return encode_basestring_ascii(o) 195 else: 196 return encode_basestring(o) 197 # This doesn't pass the iterator directly to ''.join() because the 198 # exceptions aren't as detailed. The list call should be roughly 199 # equivalent to the PySequence_Fast that ''.join() would do. 200 chunks = self.iterencode(o, _one_shot=True) 201 if not isinstance(chunks, (list, tuple)): 202 chunks = list(chunks) 203 return ''.join(chunks) 204 205 def iterencode(self, o, _one_shot=False): 206 """Encode the given object and yield each string 207 representation as available. 208 209 For example:: 210 211 for chunk in JSONEncoder().iterencode(bigobject): 212 mysocket.write(chunk) 213 214 """ 215 if self.check_circular: 216 markers = {} 217 else: 218 markers = None 219 if self.ensure_ascii: 220 _encoder = encode_basestring_ascii 221 else: 222 _encoder = encode_basestring 223 224 def floatstr(o, allow_nan=self.allow_nan, 225 _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): 226 # Check for specials. Note that this type of test is processor 227 # and/or platform-specific, so do tests which don't depend on the 228 # internals. 229 230 if o != o: 231 text = 'NaN' 232 elif o == _inf: 233 text = 'Infinity' 234 elif o == _neginf: 235 text = '-Infinity' 236 else: 237 return _repr(o) 238 239 if not allow_nan: 240 raise ValueError( 241 "Out of range float values are not JSON compliant: " + 242 repr(o)) 243 244 return text 245 246 247 if (_one_shot and c_make_encoder is not None 248 and self.indent is None): 249 _iterencode = c_make_encoder( 250 markers, self.default, _encoder, self.indent, 251 self.key_separator, self.item_separator, self.sort_keys, 252 self.skipkeys, self.allow_nan) 253 else: 254 _iterencode = _make_iterencode( 255 markers, self.default, _encoder, self.indent, floatstr, 256 self.key_separator, self.item_separator, self.sort_keys, 257 self.skipkeys, _one_shot) 258 return _iterencode(o, 0) 259 260def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 261 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 262 ## HACK: hand-optimized bytecode; turn globals into locals 263 ValueError=ValueError, 264 dict=dict, 265 float=float, 266 id=id, 267 int=int, 268 isinstance=isinstance, 269 list=list, 270 str=str, 271 tuple=tuple, 272 _intstr=int.__repr__, 273 ): 274 275 if _indent is not None and not isinstance(_indent, str): 276 _indent = ' ' * _indent 277 278 def _iterencode_list(lst, _current_indent_level): 279 if not lst: 280 yield '[]' 281 return 282 if markers is not None: 283 markerid = id(lst) 284 if markerid in markers: 285 raise ValueError("Circular reference detected") 286 markers[markerid] = lst 287 buf = '[' 288 if _indent is not None: 289 _current_indent_level += 1 290 newline_indent = '\n' + _indent * _current_indent_level 291 separator = _item_separator + newline_indent 292 buf += newline_indent 293 else: 294 newline_indent = None 295 separator = _item_separator 296 first = True 297 for value in lst: 298 if first: 299 first = False 300 else: 301 buf = separator 302 if isinstance(value, str): 303 yield buf + _encoder(value) 304 elif value is None: 305 yield buf + 'null' 306 elif value is True: 307 yield buf + 'true' 308 elif value is False: 309 yield buf + 'false' 310 elif isinstance(value, int): 311 # Subclasses of int/float may override __repr__, but we still 312 # want to encode them as integers/floats in JSON. One example 313 # within the standard library is IntEnum. 314 yield buf + _intstr(value) 315 elif isinstance(value, float): 316 # see comment above for int 317 yield buf + _floatstr(value) 318 else: 319 yield buf 320 if isinstance(value, (list, tuple)): 321 chunks = _iterencode_list(value, _current_indent_level) 322 elif isinstance(value, dict): 323 chunks = _iterencode_dict(value, _current_indent_level) 324 else: 325 chunks = _iterencode(value, _current_indent_level) 326 yield from chunks 327 if newline_indent is not None: 328 _current_indent_level -= 1 329 yield '\n' + _indent * _current_indent_level 330 yield ']' 331 if markers is not None: 332 del markers[markerid] 333 334 def _iterencode_dict(dct, _current_indent_level): 335 if not dct: 336 yield '{}' 337 return 338 if markers is not None: 339 markerid = id(dct) 340 if markerid in markers: 341 raise ValueError("Circular reference detected") 342 markers[markerid] = dct 343 yield '{' 344 if _indent is not None: 345 _current_indent_level += 1 346 newline_indent = '\n' + _indent * _current_indent_level 347 item_separator = _item_separator + newline_indent 348 yield newline_indent 349 else: 350 newline_indent = None 351 item_separator = _item_separator 352 first = True 353 if _sort_keys: 354 items = sorted(dct.items()) 355 else: 356 items = dct.items() 357 for key, value in items: 358 if isinstance(key, str): 359 pass 360 # JavaScript is weakly typed for these, so it makes sense to 361 # also allow them. Many encoders seem to do something like this. 362 elif isinstance(key, float): 363 # see comment for int/float in _make_iterencode 364 key = _floatstr(key) 365 elif key is True: 366 key = 'true' 367 elif key is False: 368 key = 'false' 369 elif key is None: 370 key = 'null' 371 elif isinstance(key, int): 372 # see comment for int/float in _make_iterencode 373 key = _intstr(key) 374 elif _skipkeys: 375 continue 376 else: 377 raise TypeError(f'keys must be str, int, float, bool or None, ' 378 f'not {key.__class__.__name__}') 379 if first: 380 first = False 381 else: 382 yield item_separator 383 yield _encoder(key) 384 yield _key_separator 385 if isinstance(value, str): 386 yield _encoder(value) 387 elif value is None: 388 yield 'null' 389 elif value is True: 390 yield 'true' 391 elif value is False: 392 yield 'false' 393 elif isinstance(value, int): 394 # see comment for int/float in _make_iterencode 395 yield _intstr(value) 396 elif isinstance(value, float): 397 # see comment for int/float in _make_iterencode 398 yield _floatstr(value) 399 else: 400 if isinstance(value, (list, tuple)): 401 chunks = _iterencode_list(value, _current_indent_level) 402 elif isinstance(value, dict): 403 chunks = _iterencode_dict(value, _current_indent_level) 404 else: 405 chunks = _iterencode(value, _current_indent_level) 406 yield from chunks 407 if newline_indent is not None: 408 _current_indent_level -= 1 409 yield '\n' + _indent * _current_indent_level 410 yield '}' 411 if markers is not None: 412 del markers[markerid] 413 414 def _iterencode(o, _current_indent_level): 415 if isinstance(o, str): 416 yield _encoder(o) 417 elif o is None: 418 yield 'null' 419 elif o is True: 420 yield 'true' 421 elif o is False: 422 yield 'false' 423 elif isinstance(o, int): 424 # see comment for int/float in _make_iterencode 425 yield _intstr(o) 426 elif isinstance(o, float): 427 # see comment for int/float in _make_iterencode 428 yield _floatstr(o) 429 elif isinstance(o, (list, tuple)): 430 yield from _iterencode_list(o, _current_indent_level) 431 elif isinstance(o, dict): 432 yield from _iterencode_dict(o, _current_indent_level) 433 else: 434 if markers is not None: 435 markerid = id(o) 436 if markerid in markers: 437 raise ValueError("Circular reference detected") 438 markers[markerid] = o 439 o = _default(o) 440 yield from _iterencode(o, _current_indent_level) 441 if markers is not None: 442 del markers[markerid] 443 return _iterencode 444