xref: /aosp_15_r20/external/starlark-go/starlarkjson/json.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1// Copyright 2020 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package starlarkjson defines utilities for converting Starlark values
6// to/from JSON strings. The most recent IETF standard for JSON is
7// https://www.ietf.org/rfc/rfc7159.txt.
8package starlarkjson // import "go.starlark.net/starlarkjson"
9
10import (
11	"bytes"
12	"encoding/json"
13	"fmt"
14	"log"
15	"math"
16	"math/big"
17	"sort"
18	"strconv"
19	"strings"
20	"unicode/utf8"
21
22	"go.starlark.net/starlark"
23	"go.starlark.net/starlarkstruct"
24)
25
26// Module json is a Starlark module of JSON-related functions.
27//
28//   json = module(
29//      encode,
30//      decode,
31//      indent,
32//   )
33//
34// def encode(x):
35//
36// The encode function accepts one required positional argument,
37// which it converts to JSON by cases:
38// - A Starlark value that implements Go's standard json.Marshal
39//   interface defines its own JSON encoding.
40// - None, True, and False are converted to null, true, and false, respectively.
41// - Starlark int values, no matter how large, are encoded as decimal integers.
42//   Some decoders may not be able to decode very large integers.
43// - Starlark float values are encoded using decimal point notation,
44//   even if the value is an integer.
45//   It is an error to encode a non-finite floating-point value.
46// - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
47// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
48//   It is an error if any key is not a string.
49// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
50// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
51// It an application-defined type matches more than one the cases describe above,
52// (e.g. it implements both Iterable and HasFields), the first case takes precedence.
53// Encoding any other value yields an error.
54//
55// def decode(x):
56//
57// The decode function accepts one positional parameter, a JSON string.
58// It returns the Starlark value that the string denotes.
59// - Numbers are parsed as int or float, depending on whether they
60//   contain a decimal point.
61// - JSON objects are parsed as new unfrozen Starlark dicts.
62// - JSON arrays are parsed as new unfrozen Starlark lists.
63// Decoding fails if x is not a valid JSON string.
64//
65// def indent(str, *, prefix="", indent="\t"):
66//
67// The indent function pretty-prints a valid JSON encoding,
68// and returns a string containing the indented form.
69// It accepts one required positional parameter, the JSON string,
70// and two optional keyword-only string parameters, prefix and indent,
71// that specify a prefix of each new line, and the unit of indentation.
72//
73var Module = &starlarkstruct.Module{
74	Name: "json",
75	Members: starlark.StringDict{
76		"encode": starlark.NewBuiltin("json.encode", encode),
77		"decode": starlark.NewBuiltin("json.decode", decode),
78		"indent": starlark.NewBuiltin("json.indent", indent),
79	},
80}
81
82func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
83	var x starlark.Value
84	if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
85		return nil, err
86	}
87
88	buf := new(bytes.Buffer)
89
90	var quoteSpace [128]byte
91	quote := func(s string) {
92		// Non-trivial escaping is handled by Go's encoding/json.
93		if isPrintableASCII(s) {
94			buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
95		} else {
96			// TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
97			// Can we avoid this call?
98			data, _ := json.Marshal(s)
99			buf.Write(data)
100		}
101	}
102
103	var emit func(x starlark.Value) error
104	emit = func(x starlark.Value) error {
105		switch x := x.(type) {
106		case json.Marshaler:
107			// Application-defined starlark.Value types
108			// may define their own JSON encoding.
109			data, err := x.MarshalJSON()
110			if err != nil {
111				return err
112			}
113			buf.Write(data)
114
115		case starlark.NoneType:
116			buf.WriteString("null")
117
118		case starlark.Bool:
119			if x {
120				buf.WriteString("true")
121			} else {
122				buf.WriteString("false")
123			}
124
125		case starlark.Int:
126			fmt.Fprint(buf, x)
127
128		case starlark.Float:
129			if !isFinite(float64(x)) {
130				return fmt.Errorf("cannot encode non-finite float %v", x)
131			}
132			fmt.Fprintf(buf, "%g", x) // always contains a decimal point
133
134		case starlark.String:
135			quote(string(x))
136
137		case starlark.IterableMapping:
138			// e.g. dict (must have string keys)
139			buf.WriteByte('{')
140			items := x.Items()
141			for _, item := range items {
142				if _, ok := item[0].(starlark.String); !ok {
143					return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
144				}
145			}
146			sort.Slice(items, func(i, j int) bool {
147				return items[i][0].(starlark.String) < items[j][0].(starlark.String)
148			})
149			for i, item := range items {
150				if i > 0 {
151					buf.WriteByte(',')
152				}
153				k, _ := starlark.AsString(item[0])
154				quote(k)
155				buf.WriteByte(':')
156				if err := emit(item[1]); err != nil {
157					return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
158				}
159			}
160			buf.WriteByte('}')
161
162		case starlark.Iterable:
163			// e.g. tuple, list
164			buf.WriteByte('[')
165			iter := x.Iterate()
166			defer iter.Done()
167			var elem starlark.Value
168			for i := 0; iter.Next(&elem); i++ {
169				if i > 0 {
170					buf.WriteByte(',')
171				}
172				if err := emit(elem); err != nil {
173					return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
174				}
175			}
176			buf.WriteByte(']')
177
178		case starlark.HasAttrs:
179			// e.g. struct
180			buf.WriteByte('{')
181			var names []string
182			names = append(names, x.AttrNames()...)
183			sort.Strings(names)
184			for i, name := range names {
185				v, err := x.Attr(name)
186				if err != nil || v == nil {
187					log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name)
188				}
189				if i > 0 {
190					buf.WriteByte(',')
191				}
192				quote(name)
193				buf.WriteByte(':')
194				if err := emit(v); err != nil {
195					return fmt.Errorf("in field .%s: %v", name, err)
196				}
197			}
198			buf.WriteByte('}')
199
200		default:
201			return fmt.Errorf("cannot encode %s as JSON", x.Type())
202		}
203		return nil
204	}
205
206	if err := emit(x); err != nil {
207		return nil, fmt.Errorf("%s: %v", b.Name(), err)
208	}
209	return starlark.String(buf.String()), nil
210}
211
212// isPrintableASCII reports whether s contains only printable ASCII.
213func isPrintableASCII(s string) bool {
214	for i := 0; i < len(s); i++ {
215		b := s[i]
216		if b < 0x20 || b >= 0x80 {
217			return false
218		}
219	}
220	return true
221}
222
223// isFinite reports whether f represents a finite rational value.
224// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
225func isFinite(f float64) bool {
226	return math.Abs(f) <= math.MaxFloat64
227}
228
229func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
230	prefix, indent := "", "\t" // keyword-only
231	if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
232		"prefix?", &prefix,
233		"indent?", &indent,
234	); err != nil {
235		return nil, err
236	}
237	var str string // positional-only
238	if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
239		return nil, err
240	}
241
242	buf := new(bytes.Buffer)
243	if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
244		return nil, fmt.Errorf("%s: %v", b.Name(), err)
245	}
246	return starlark.String(buf.String()), nil
247}
248
249func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) {
250	var s string
251	if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil {
252		return nil, err
253	}
254
255	// The decoder necessarily makes certain representation choices
256	// such as list vs tuple, struct vs dict, int vs float.
257	// In principle, we could parameterize it to allow the caller to
258	// control the returned types, but there's no compelling need yet.
259
260	// Use panic/recover with a distinguished type (failure) for error handling.
261	type failure string
262	fail := func(format string, args ...interface{}) {
263		panic(failure(fmt.Sprintf(format, args...)))
264	}
265
266	i := 0
267
268	// skipSpace consumes leading spaces, and reports whether there is more input.
269	skipSpace := func() bool {
270		for ; i < len(s); i++ {
271			b := s[i]
272			if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
273				return true
274			}
275		}
276		return false
277	}
278
279	// next consumes leading spaces and returns the first non-space.
280	// It panics if at EOF.
281	next := func() byte {
282		if skipSpace() {
283			return s[i]
284		}
285		fail("unexpected end of file")
286		panic("unreachable")
287	}
288
289	// parse returns the next JSON value from the input.
290	// It consumes leading but not trailing whitespace.
291	// It panics on error.
292	var parse func() starlark.Value
293	parse = func() starlark.Value {
294		b := next()
295		switch b {
296		case '"':
297			// string
298
299			// Find end of quotation.
300			// Also, record whether trivial unquoting is safe.
301			// Non-trivial unquoting is handled by Go's encoding/json.
302			safe := true
303			closed := false
304			j := i + 1
305			for ; j < len(s); j++ {
306				b := s[j]
307				if b == '\\' {
308					safe = false
309					j++ // skip x in \x
310				} else if b == '"' {
311					closed = true
312					j++ // skip '"'
313					break
314				} else if b >= utf8.RuneSelf {
315					safe = false
316				}
317			}
318			if !closed {
319				fail("unclosed string literal")
320			}
321
322			r := s[i:j]
323			i = j
324
325			// unquote
326			if safe {
327				r = r[1 : len(r)-1]
328			} else if err := json.Unmarshal([]byte(r), &r); err != nil {
329				fail("%s", err)
330			}
331			return starlark.String(r)
332
333		case 'n':
334			if strings.HasPrefix(s[i:], "null") {
335				i += len("null")
336				return starlark.None
337			}
338
339		case 't':
340			if strings.HasPrefix(s[i:], "true") {
341				i += len("true")
342				return starlark.True
343			}
344
345		case 'f':
346			if strings.HasPrefix(s[i:], "false") {
347				i += len("false")
348				return starlark.False
349			}
350
351		case '[':
352			// array
353			var elems []starlark.Value
354
355			i++ // '['
356			b = next()
357			if b != ']' {
358				for {
359					elem := parse()
360					elems = append(elems, elem)
361					b = next()
362					if b != ',' {
363						if b != ']' {
364							fail("got %q, want ',' or ']'", b)
365						}
366						break
367					}
368					i++ // ','
369				}
370			}
371			i++ // ']'
372			return starlark.NewList(elems)
373
374		case '{':
375			// object
376			dict := new(starlark.Dict)
377
378			i++ // '{'
379			b = next()
380			if b != '}' {
381				for {
382					key := parse()
383					if _, ok := key.(starlark.String); !ok {
384						fail("got %s for object key, want string", key.Type())
385					}
386					b = next()
387					if b != ':' {
388						fail("after object key, got %q, want ':' ", b)
389					}
390					i++ // ':'
391					value := parse()
392					dict.SetKey(key, value) // can't fail
393					b = next()
394					if b != ',' {
395						if b != '}' {
396							fail("in object, got %q, want ',' or '}'", b)
397						}
398						break
399					}
400					i++ // ','
401				}
402			}
403			i++ // '}'
404			return dict
405
406		default:
407			// number?
408			if isdigit(b) || b == '-' {
409				// scan literal. Allow [0-9+-eE.] for now.
410				float := false
411				var j int
412				for j = i + 1; j < len(s); j++ {
413					b = s[j]
414					if isdigit(b) {
415						// ok
416					} else if b == '.' ||
417						b == 'e' ||
418						b == 'E' ||
419						b == '+' ||
420						b == '-' {
421						float = true
422					} else {
423						break
424					}
425				}
426				num := s[i:j]
427				i = j
428
429				// Unlike most C-like languages,
430				// JSON disallows a leading zero before a digit.
431				digits := num
432				if num[0] == '-' {
433					digits = num[1:]
434				}
435				if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
436					fail("invalid number: %s", num)
437				}
438
439				// parse literal
440				if float {
441					x, err := strconv.ParseFloat(num, 64)
442					if err != nil {
443						fail("invalid number: %s", num)
444					}
445					return starlark.Float(x)
446				} else {
447					x, ok := new(big.Int).SetString(num, 10)
448					if !ok {
449						fail("invalid number: %s", num)
450					}
451					return starlark.MakeBigInt(x)
452				}
453			}
454		}
455		fail("unexpected character %q", b)
456		panic("unreachable")
457	}
458	defer func() {
459		x := recover()
460		switch x := x.(type) {
461		case failure:
462			err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
463		case nil:
464			// nop
465		default:
466			panic(x) // unexpected panic
467		}
468	}()
469	x := parse()
470	if skipSpace() {
471		fail("unexpected character %q after value", s[i])
472	}
473	return x, nil
474}
475
476func isdigit(b byte) bool {
477	return b >= '0' && b <= '9'
478}
479