1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 mod visitor;
6
7 use crate::visitor::ValueVisitor;
8
9 use serde::de::Deserializer;
10 use serde_json_lenient::de::SliceRead;
11 use std::pin::Pin;
12
13 /// UTF8 byte order mark.
14 const UTF8_BOM: [u8; 3] = [0xef, 0xbb, 0xbf];
15
16 /// C++ bindings
17 #[cxx::bridge(namespace=serde_json_lenient)]
18 mod ffi {
19 // From the `wrapper_functions` target.
20 unsafe extern "C++" {
21 include!("third_party/rust/serde_json_lenient/v0_2/wrapper/functions.h");
22
23 type ContextPointer;
24
25 type Functions;
list_append_none(self: &Functions, ctx: Pin<&mut ContextPointer>)26 fn list_append_none(self: &Functions, ctx: Pin<&mut ContextPointer>);
list_append_bool(self: &Functions, ctx: Pin<&mut ContextPointer>, val: bool)27 fn list_append_bool(self: &Functions, ctx: Pin<&mut ContextPointer>, val: bool);
list_append_i32(self: &Functions, ctx: Pin<&mut ContextPointer>, val: i32)28 fn list_append_i32(self: &Functions, ctx: Pin<&mut ContextPointer>, val: i32);
list_append_f64(self: &Functions, ctx: Pin<&mut ContextPointer>, val: f64)29 fn list_append_f64(self: &Functions, ctx: Pin<&mut ContextPointer>, val: f64);
list_append_str(self: &Functions, ctx: Pin<&mut ContextPointer>, val: &str)30 fn list_append_str(self: &Functions, ctx: Pin<&mut ContextPointer>, val: &str);
list_append_list<'a>( self: &Functions, ctx: Pin<&'a mut ContextPointer>, reserve: usize, ) -> Pin<&'a mut ContextPointer>31 fn list_append_list<'a>(
32 self: &Functions,
33 ctx: Pin<&'a mut ContextPointer>,
34 reserve: usize,
35 ) -> Pin<&'a mut ContextPointer>;
list_append_dict<'a>( self: &Functions, ctx: Pin<&'a mut ContextPointer>, ) -> Pin<&'a mut ContextPointer>36 fn list_append_dict<'a>(
37 self: &Functions,
38 ctx: Pin<&'a mut ContextPointer>,
39 ) -> Pin<&'a mut ContextPointer>;
40
dict_set_none(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str)41 fn dict_set_none(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str);
dict_set_bool(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: bool)42 fn dict_set_bool(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: bool);
dict_set_i32(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: i32)43 fn dict_set_i32(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: i32);
dict_set_f64(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: f64)44 fn dict_set_f64(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: f64);
dict_set_str(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: &str)45 fn dict_set_str(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: &str);
dict_set_list<'f, 'a>( self: &Functions, ctx: Pin<&'a mut ContextPointer>, key: &'f str, reserve: usize, ) -> Pin<&'a mut ContextPointer>46 fn dict_set_list<'f, 'a>(
47 self: &Functions,
48 ctx: Pin<&'a mut ContextPointer>,
49 key: &'f str,
50 reserve: usize,
51 ) -> Pin<&'a mut ContextPointer>;
dict_set_dict<'f, 'a>( self: &Functions, ctx: Pin<&'a mut ContextPointer>, key: &'f str, ) -> Pin<&'a mut ContextPointer>52 fn dict_set_dict<'f, 'a>(
53 self: &Functions,
54 ctx: Pin<&'a mut ContextPointer>,
55 key: &'f str,
56 ) -> Pin<&'a mut ContextPointer>;
57 }
58
59 extern "Rust" {
decode_json( json: &[u8], options: &JsonOptions, functions: &'static Functions, ctx: Pin<&mut ContextPointer>, error: Pin<&mut DecodeError>, ) -> bool60 fn decode_json(
61 json: &[u8],
62 options: &JsonOptions,
63 functions: &'static Functions,
64 ctx: Pin<&mut ContextPointer>,
65 error: Pin<&mut DecodeError>,
66 ) -> bool;
67 }
68
69 struct DecodeError {
70 line: i32,
71 column: i32,
72 message: String,
73 }
74
75 /// Options for parsing JSON inputs. A mirror of the C++
76 /// `base::JSONParserOptions` bitflags, represented as a friendlier
77 /// struct-of-bools instead, and with additional fields
78 struct JsonOptions {
79 /// Allows commas to exist after the last element in structures.
80 allow_trailing_commas: bool,
81 /// If set the parser replaces invalid code points (i.e. lone
82 /// surrogates) with the Unicode replacement character (U+FFFD).
83 /// If not set, invalid code points trigger a hard error and
84 /// parsing fails.
85 replace_invalid_characters: bool,
86 /// Allows both C (/* */) and C++ (//) style comments.
87 allow_comments: bool,
88 /// Permits unescaped \r and \n in strings. This is a subset of what
89 /// allow_control_chars allows.
90 allow_newlines: bool,
91 /// Permits unescaped ASCII control characters (such as unescaped \b,
92 /// \r, or \n) in the range [0x00,0x1F].
93 allow_control_chars: bool,
94 /// Permits \\v vertical tab escapes.
95 allow_vert_tab: bool,
96 /// Permits \\xNN escapes as described above.
97 allow_x_escapes: bool,
98
99 /// The maximum recursion depth to walk while parsing nested JSON
100 /// objects. JSON beyond the specified depth will be ignored.
101 max_depth: usize,
102 }
103 }
104
105 pub type DecodeError = ffi::DecodeError;
106 pub type JsonOptions = ffi::JsonOptions;
107 pub type Functions = ffi::Functions;
108 pub type ContextPointer = ffi::ContextPointer;
109
110 /// Decode a JSON input from `json` and call back out to functions defined in
111 /// `options` when visiting each node in order for the caller to construct an
112 /// output.
113 ///
114 /// The first item visited will be appened to the `ctx` as if the `ctx` were a
115 /// list. This means the `ContextPointer` in `ctx` must already be a list
116 /// aggregate type, unless the caller has extra logic to handle the first
117 /// element visited.
118 ///
119 /// The `error` is only written to when there is an error decoding and `false`
120 /// is returned.
121 ///
122 /// # Returns
123 ///
124 /// Whether the decode succeeded.
decode_json( json: &[u8], options: &JsonOptions, functions: &'static Functions, ctx: Pin<&mut ContextPointer>, mut error: Pin<&mut DecodeError>, ) -> bool125 pub fn decode_json(
126 json: &[u8],
127 options: &JsonOptions,
128 functions: &'static Functions,
129 // TODO(danakj): Use std::ptr::NonNull when the binding generator supports it.
130 ctx: Pin<&mut ContextPointer>,
131 // TODO(danakj): Return `Result<(), DecodeError>` once the binding generator supports it.
132 mut error: Pin<&mut DecodeError>,
133 ) -> bool {
134 let mut deserializer = serde_json_lenient::Deserializer::new(SliceRead::new(
135 if json.starts_with(&UTF8_BOM) { &json[3..] } else { json },
136 options.replace_invalid_characters,
137
138 // On the C++ side, allow_control_chars means "allow all control chars,
139 // including \r and \n", while in serde_json_lenient,
140 // allow_control_chars means "allow all controls chars, except \r and
141 // \n". To give the behavior that C++ client code is expecting, enable
142 // allow_newlines as well when allow_control_chars is supplied.
143 options.allow_newlines || options.allow_control_chars,
144 options.allow_control_chars,
145 options.allow_vert_tab,
146 options.allow_x_escapes,
147 ));
148 deserializer.set_ignore_trailing_commas(options.allow_trailing_commas);
149 deserializer.set_allow_comments(options.allow_comments);
150
151 // We track recursion depth ourselves to limit it to `max_depth` option.
152 deserializer.disable_recursion_limit();
153
154 // The first element visited will be treated as if being appended to a list, as
155 // is specified in the contract of `decode_json()`.
156 //
157 // SAFETY: We have only a single ContextPointer around at a time, so this
158 // reference will not alias. The lifetime of the ContextPointer exceeds this
159 // function's lifetime, so we are okay to tie it to the `target`'s lifetime
160 // which is shorter.
161 //
162 // Dereferencing the ContextPointer in C++ would be Undefined Behaviour since
163 // it's not a similar type to the actual type it's pointing to, but Rust
164 // allows us to make a reference to it regardless.
165 let target = visitor::DeserializationTarget::List { ctx };
166
167 let result =
168 deserializer.deserialize_any(ValueVisitor::new(&functions, target, options.max_depth));
169 match result.and(deserializer.end()) {
170 Ok(()) => true,
171 Err(err) => {
172 error.as_mut().line = err.line().try_into().unwrap_or(-1);
173 error.as_mut().column = err.column().try_into().unwrap_or(-1);
174 error.as_mut().message.clear();
175 // The following line pulls in a lot of binary bloat, due to all the formatter
176 // implementations required to stringify error messages. This error message is
177 // used in only a couple of places outside unit tests so we could
178 // consider trying to eliminate.
179 error.as_mut().message.push_str(&err.to_string());
180 false
181 }
182 }
183 }
184