xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/json/encoder.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1"""Implementation of JSONEncoder
2"""
3import re
4
5try:
6    from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8    c_encode_basestring_ascii = None
9try:
10    from _json import encode_basestring as c_encode_basestring
11except ImportError:
12    c_encode_basestring = None
13try:
14    from _json import make_encoder as c_make_encoder
15except ImportError:
16    c_make_encoder = None
17
18ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
19ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
20HAS_UTF8 = re.compile(b'[\x80-\xff]')
21ESCAPE_DCT = {
22    '\\': '\\\\',
23    '"': '\\"',
24    '\b': '\\b',
25    '\f': '\\f',
26    '\n': '\\n',
27    '\r': '\\r',
28    '\t': '\\t',
29}
30for i in range(0x20):
31    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33del i
34
35INFINITY = float('inf')
36
37def py_encode_basestring(s):
38    """Return a JSON representation of a Python string
39
40    """
41    def replace(match):
42        return ESCAPE_DCT[match.group(0)]
43    return '"' + ESCAPE.sub(replace, s) + '"'
44
45
46encode_basestring = (c_encode_basestring or py_encode_basestring)
47
48
49def py_encode_basestring_ascii(s):
50    """Return an ASCII-only JSON representation of a Python string
51
52    """
53    def replace(match):
54        s = match.group(0)
55        try:
56            return ESCAPE_DCT[s]
57        except KeyError:
58            n = ord(s)
59            if n < 0x10000:
60                return '\\u{0:04x}'.format(n)
61                #return '\\u%04x' % (n,)
62            else:
63                # surrogate pair
64                n -= 0x10000
65                s1 = 0xd800 | ((n >> 10) & 0x3ff)
66                s2 = 0xdc00 | (n & 0x3ff)
67                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68    return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
69
70
71encode_basestring_ascii = (
72    c_encode_basestring_ascii or py_encode_basestring_ascii)
73
74class JSONEncoder(object):
75    """Extensible JSON <https://json.org> encoder for Python data structures.
76
77    Supports the following objects and types by default:
78
79    +-------------------+---------------+
80    | Python            | JSON          |
81    +===================+===============+
82    | dict              | object        |
83    +-------------------+---------------+
84    | list, tuple       | array         |
85    +-------------------+---------------+
86    | str               | string        |
87    +-------------------+---------------+
88    | int, float        | number        |
89    +-------------------+---------------+
90    | True              | true          |
91    +-------------------+---------------+
92    | False             | false         |
93    +-------------------+---------------+
94    | None              | null          |
95    +-------------------+---------------+
96
97    To extend this to recognize other objects, subclass and implement a
98    ``.default()`` method with another method that returns a serializable
99    object for ``o`` if possible, otherwise it should call the superclass
100    implementation (to raise ``TypeError``).
101
102    """
103    item_separator = ', '
104    key_separator = ': '
105    def __init__(self, *, skipkeys=False, ensure_ascii=True,
106            check_circular=True, allow_nan=True, sort_keys=False,
107            indent=None, separators=None, default=None):
108        """Constructor for JSONEncoder, with sensible defaults.
109
110        If skipkeys is false, then it is a TypeError to attempt
111        encoding of keys that are not str, int, float or None.  If
112        skipkeys is True, such items are simply skipped.
113
114        If ensure_ascii is true, the output is guaranteed to be str
115        objects with all incoming non-ASCII characters escaped.  If
116        ensure_ascii is false, the output can contain non-ASCII characters.
117
118        If check_circular is true, then lists, dicts, and custom encoded
119        objects will be checked for circular references during encoding to
120        prevent an infinite recursion (which would cause an RecursionError).
121        Otherwise, no such check takes place.
122
123        If allow_nan is true, then NaN, Infinity, and -Infinity will be
124        encoded as such.  This behavior is not JSON specification compliant,
125        but is consistent with most JavaScript based encoders and decoders.
126        Otherwise, it will be a ValueError to encode such floats.
127
128        If sort_keys is true, then the output of dictionaries will be
129        sorted by key; this is useful for regression tests to ensure
130        that JSON serializations can be compared on a day-to-day basis.
131
132        If indent is a non-negative integer, then JSON array
133        elements and object members will be pretty-printed with that
134        indent level.  An indent level of 0 will only insert newlines.
135        None is the most compact representation.
136
137        If specified, separators should be an (item_separator, key_separator)
138        tuple.  The default is (', ', ': ') if *indent* is ``None`` and
139        (',', ': ') otherwise.  To get the most compact JSON representation,
140        you should specify (',', ':') to eliminate whitespace.
141
142        If specified, default is a function that gets called for objects
143        that can't otherwise be serialized.  It should return a JSON encodable
144        version of the object or raise a ``TypeError``.
145
146        """
147
148        self.skipkeys = skipkeys
149        self.ensure_ascii = ensure_ascii
150        self.check_circular = check_circular
151        self.allow_nan = allow_nan
152        self.sort_keys = sort_keys
153        self.indent = indent
154        if separators is not None:
155            self.item_separator, self.key_separator = separators
156        elif indent is not None:
157            self.item_separator = ','
158        if default is not None:
159            self.default = default
160
161    def default(self, o):
162        """Implement this method in a subclass such that it returns
163        a serializable object for ``o``, or calls the base implementation
164        (to raise a ``TypeError``).
165
166        For example, to support arbitrary iterators, you could
167        implement default like this::
168
169            def default(self, o):
170                try:
171                    iterable = iter(o)
172                except TypeError:
173                    pass
174                else:
175                    return list(iterable)
176                # Let the base class default method raise the TypeError
177                return JSONEncoder.default(self, o)
178
179        """
180        raise TypeError(f'Object of type {o.__class__.__name__} '
181                        f'is not JSON serializable')
182
183    def encode(self, o):
184        """Return a JSON string representation of a Python data structure.
185
186        >>> from json.encoder import JSONEncoder
187        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
188        '{"foo": ["bar", "baz"]}'
189
190        """
191        # This is for extremely simple cases and benchmarks.
192        if isinstance(o, str):
193            if self.ensure_ascii:
194                return encode_basestring_ascii(o)
195            else:
196                return encode_basestring(o)
197        # This doesn't pass the iterator directly to ''.join() because the
198        # exceptions aren't as detailed.  The list call should be roughly
199        # equivalent to the PySequence_Fast that ''.join() would do.
200        chunks = self.iterencode(o, _one_shot=True)
201        if not isinstance(chunks, (list, tuple)):
202            chunks = list(chunks)
203        return ''.join(chunks)
204
205    def iterencode(self, o, _one_shot=False):
206        """Encode the given object and yield each string
207        representation as available.
208
209        For example::
210
211            for chunk in JSONEncoder().iterencode(bigobject):
212                mysocket.write(chunk)
213
214        """
215        if self.check_circular:
216            markers = {}
217        else:
218            markers = None
219        if self.ensure_ascii:
220            _encoder = encode_basestring_ascii
221        else:
222            _encoder = encode_basestring
223
224        def floatstr(o, allow_nan=self.allow_nan,
225                _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
226            # Check for specials.  Note that this type of test is processor
227            # and/or platform-specific, so do tests which don't depend on the
228            # internals.
229
230            if o != o:
231                text = 'NaN'
232            elif o == _inf:
233                text = 'Infinity'
234            elif o == _neginf:
235                text = '-Infinity'
236            else:
237                return _repr(o)
238
239            if not allow_nan:
240                raise ValueError(
241                    "Out of range float values are not JSON compliant: " +
242                    repr(o))
243
244            return text
245
246
247        if (_one_shot and c_make_encoder is not None
248                and self.indent is None):
249            _iterencode = c_make_encoder(
250                markers, self.default, _encoder, self.indent,
251                self.key_separator, self.item_separator, self.sort_keys,
252                self.skipkeys, self.allow_nan)
253        else:
254            _iterencode = _make_iterencode(
255                markers, self.default, _encoder, self.indent, floatstr,
256                self.key_separator, self.item_separator, self.sort_keys,
257                self.skipkeys, _one_shot)
258        return _iterencode(o, 0)
259
260def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
261        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
262        ## HACK: hand-optimized bytecode; turn globals into locals
263        ValueError=ValueError,
264        dict=dict,
265        float=float,
266        id=id,
267        int=int,
268        isinstance=isinstance,
269        list=list,
270        str=str,
271        tuple=tuple,
272        _intstr=int.__repr__,
273    ):
274
275    if _indent is not None and not isinstance(_indent, str):
276        _indent = ' ' * _indent
277
278    def _iterencode_list(lst, _current_indent_level):
279        if not lst:
280            yield '[]'
281            return
282        if markers is not None:
283            markerid = id(lst)
284            if markerid in markers:
285                raise ValueError("Circular reference detected")
286            markers[markerid] = lst
287        buf = '['
288        if _indent is not None:
289            _current_indent_level += 1
290            newline_indent = '\n' + _indent * _current_indent_level
291            separator = _item_separator + newline_indent
292            buf += newline_indent
293        else:
294            newline_indent = None
295            separator = _item_separator
296        first = True
297        for value in lst:
298            if first:
299                first = False
300            else:
301                buf = separator
302            if isinstance(value, str):
303                yield buf + _encoder(value)
304            elif value is None:
305                yield buf + 'null'
306            elif value is True:
307                yield buf + 'true'
308            elif value is False:
309                yield buf + 'false'
310            elif isinstance(value, int):
311                # Subclasses of int/float may override __repr__, but we still
312                # want to encode them as integers/floats in JSON. One example
313                # within the standard library is IntEnum.
314                yield buf + _intstr(value)
315            elif isinstance(value, float):
316                # see comment above for int
317                yield buf + _floatstr(value)
318            else:
319                yield buf
320                if isinstance(value, (list, tuple)):
321                    chunks = _iterencode_list(value, _current_indent_level)
322                elif isinstance(value, dict):
323                    chunks = _iterencode_dict(value, _current_indent_level)
324                else:
325                    chunks = _iterencode(value, _current_indent_level)
326                yield from chunks
327        if newline_indent is not None:
328            _current_indent_level -= 1
329            yield '\n' + _indent * _current_indent_level
330        yield ']'
331        if markers is not None:
332            del markers[markerid]
333
334    def _iterencode_dict(dct, _current_indent_level):
335        if not dct:
336            yield '{}'
337            return
338        if markers is not None:
339            markerid = id(dct)
340            if markerid in markers:
341                raise ValueError("Circular reference detected")
342            markers[markerid] = dct
343        yield '{'
344        if _indent is not None:
345            _current_indent_level += 1
346            newline_indent = '\n' + _indent * _current_indent_level
347            item_separator = _item_separator + newline_indent
348            yield newline_indent
349        else:
350            newline_indent = None
351            item_separator = _item_separator
352        first = True
353        if _sort_keys:
354            items = sorted(dct.items())
355        else:
356            items = dct.items()
357        for key, value in items:
358            if isinstance(key, str):
359                pass
360            # JavaScript is weakly typed for these, so it makes sense to
361            # also allow them.  Many encoders seem to do something like this.
362            elif isinstance(key, float):
363                # see comment for int/float in _make_iterencode
364                key = _floatstr(key)
365            elif key is True:
366                key = 'true'
367            elif key is False:
368                key = 'false'
369            elif key is None:
370                key = 'null'
371            elif isinstance(key, int):
372                # see comment for int/float in _make_iterencode
373                key = _intstr(key)
374            elif _skipkeys:
375                continue
376            else:
377                raise TypeError(f'keys must be str, int, float, bool or None, '
378                                f'not {key.__class__.__name__}')
379            if first:
380                first = False
381            else:
382                yield item_separator
383            yield _encoder(key)
384            yield _key_separator
385            if isinstance(value, str):
386                yield _encoder(value)
387            elif value is None:
388                yield 'null'
389            elif value is True:
390                yield 'true'
391            elif value is False:
392                yield 'false'
393            elif isinstance(value, int):
394                # see comment for int/float in _make_iterencode
395                yield _intstr(value)
396            elif isinstance(value, float):
397                # see comment for int/float in _make_iterencode
398                yield _floatstr(value)
399            else:
400                if isinstance(value, (list, tuple)):
401                    chunks = _iterencode_list(value, _current_indent_level)
402                elif isinstance(value, dict):
403                    chunks = _iterencode_dict(value, _current_indent_level)
404                else:
405                    chunks = _iterencode(value, _current_indent_level)
406                yield from chunks
407        if newline_indent is not None:
408            _current_indent_level -= 1
409            yield '\n' + _indent * _current_indent_level
410        yield '}'
411        if markers is not None:
412            del markers[markerid]
413
414    def _iterencode(o, _current_indent_level):
415        if isinstance(o, str):
416            yield _encoder(o)
417        elif o is None:
418            yield 'null'
419        elif o is True:
420            yield 'true'
421        elif o is False:
422            yield 'false'
423        elif isinstance(o, int):
424            # see comment for int/float in _make_iterencode
425            yield _intstr(o)
426        elif isinstance(o, float):
427            # see comment for int/float in _make_iterencode
428            yield _floatstr(o)
429        elif isinstance(o, (list, tuple)):
430            yield from _iterencode_list(o, _current_indent_level)
431        elif isinstance(o, dict):
432            yield from _iterencode_dict(o, _current_indent_level)
433        else:
434            if markers is not None:
435                markerid = id(o)
436                if markerid in markers:
437                    raise ValueError("Circular reference detected")
438                markers[markerid] = o
439            o = _default(o)
440            yield from _iterencode(o, _current_indent_level)
441            if markers is not None:
442                del markers[markerid]
443    return _iterencode
444