1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 #[macro_use]
11 extern crate pest;
12 
13 use std::collections::HashMap;
14 
15 use pest::error::Error;
16 use pest::iterators::{Pair, Pairs};
17 use pest::{state, ParseResult, Parser, ParserState, Span};
18 
19 #[allow(dead_code, non_camel_case_types)]
20 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
21 enum Rule {
22     json,
23     object,
24     pair,
25     array,
26     value,
27     string,
28     escape,
29     unicode,
30     hex,
31     number,
32     int,
33     exp,
34     bool,
35     null,
36 }
37 
38 struct JsonParser;
39 
40 impl Parser<Rule> for JsonParser {
41     // false positive: pest uses `..` as a complete range (historically)
42     #[allow(clippy::almost_complete_range)]
parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>>43     fn parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>> {
44         fn json(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
45             value(state)
46         }
47 
48         fn object(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
49             state.rule(Rule::object, |s| {
50                 s.sequence(|s| {
51                     s.match_string("{")
52                         .and_then(skip)
53                         .and_then(pair)
54                         .and_then(skip)
55                         .and_then(|s| {
56                             s.repeat(|s| {
57                                 s.sequence(|s| {
58                                     s.match_string(",")
59                                         .and_then(skip)
60                                         .and_then(pair)
61                                         .and_then(skip)
62                                 })
63                             })
64                         })
65                         .and_then(|s| s.match_string("}"))
66                 })
67                 .or_else(|s| {
68                     s.sequence(|s| {
69                         s.match_string("{")
70                             .and_then(skip)
71                             .and_then(|s| s.match_string("}"))
72                     })
73                 })
74             })
75         }
76 
77         fn pair(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
78             state.rule(Rule::pair, |s| {
79                 s.sequence(|s| {
80                     string(s)
81                         .and_then(skip)
82                         .and_then(|s| s.match_string(":"))
83                         .and_then(skip)
84                         .and_then(value)
85                 })
86             })
87         }
88 
89         fn array(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
90             state.rule(Rule::array, |s| {
91                 s.sequence(|s| {
92                     s.match_string("[")
93                         .and_then(skip)
94                         .and_then(value)
95                         .and_then(skip)
96                         .and_then(|s| {
97                             s.repeat(|s| {
98                                 s.sequence(|s| {
99                                     s.match_string(",")
100                                         .and_then(skip)
101                                         .and_then(value)
102                                         .and_then(skip)
103                                 })
104                             })
105                         })
106                         .and_then(|s| s.match_string("]"))
107                 })
108                 .or_else(|s| {
109                     s.sequence(|s| {
110                         s.match_string("[")
111                             .and_then(skip)
112                             .and_then(|s| s.match_string("]"))
113                     })
114                 })
115             })
116         }
117 
118         fn value(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
119             state.rule(Rule::value, |s| {
120                 string(s)
121                     .or_else(number)
122                     .or_else(object)
123                     .or_else(array)
124                     .or_else(bool)
125                     .or_else(null)
126             })
127         }
128 
129         fn string(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
130             state.rule(Rule::string, |s| {
131                 s.match_string("\"")
132                     .and_then(|s| {
133                         s.repeat(|s| {
134                             escape(s).or_else(|s| {
135                                 s.sequence(|s| {
136                                     s.lookahead(false, |s| {
137                                         s.match_string("\"").or_else(|s| s.match_string("\\"))
138                                     })
139                                     .and_then(|s| s.skip(1))
140                                 })
141                             })
142                         })
143                     })
144                     .and_then(|pos| pos.match_string("\""))
145             })
146         }
147 
148         fn escape(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
149             state.sequence(|s| {
150                 s.match_string("\\").and_then(|s| {
151                     s.match_string("\"")
152                         .or_else(|s| s.match_string("\\"))
153                         .or_else(|s| s.match_string("/"))
154                         .or_else(|s| s.match_string("b"))
155                         .or_else(|s| s.match_string("f"))
156                         .or_else(|s| s.match_string("n"))
157                         .or_else(|s| s.match_string("r"))
158                         .or_else(|s| s.match_string("t"))
159                         .or_else(unicode)
160                 })
161             })
162         }
163 
164         fn unicode(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
165             state.sequence(|s| {
166                 s.match_string("u")
167                     .and_then(hex)
168                     .and_then(hex)
169                     .and_then(hex)
170             })
171         }
172 
173         fn hex(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
174             state
175                 .match_range('0'..'9')
176                 .or_else(|s| s.match_range('a'..'f'))
177                 .or_else(|s| s.match_range('A'..'F'))
178         }
179 
180         fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
181             state.rule(Rule::number, |s| {
182                 s.sequence(|s| {
183                     s.optional(|s| s.match_string("-"))
184                         .and_then(int)
185                         .and_then(|s| {
186                             s.optional(|s| {
187                                 s.sequence(|s| {
188                                     s.match_string(".")
189                                         .and_then(|s| s.match_range('0'..'9'))
190                                         .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
191                                         .and_then(|s| s.optional(exp))
192                                         .or_else(exp)
193                                 })
194                             })
195                         })
196                 })
197             })
198         }
199 
200         fn int(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
201             state.match_string("0").or_else(|s| {
202                 s.sequence(|s| {
203                     s.match_range('1'..'9')
204                         .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
205                 })
206             })
207         }
208 
209         fn exp(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
210             state.sequence(|s| {
211                 s.match_string("E")
212                     .or_else(|s| s.match_string("e"))
213                     .and_then(|s| {
214                         s.optional(|s| s.match_string("+").or_else(|s| s.match_string("-")))
215                     })
216                     .and_then(int)
217             })
218         }
219 
220         fn bool(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
221             state.rule(Rule::bool, |s| {
222                 s.match_string("true").or_else(|s| s.match_string("false"))
223             })
224         }
225 
226         fn null(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
227             state.rule(Rule::null, |s| s.match_string("null"))
228         }
229 
230         fn skip(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
231             state.repeat(|s| {
232                 s.match_string(" ")
233                     .or_else(|s| s.match_string("\t"))
234                     .or_else(|s| s.match_string("\r"))
235                     .or_else(|s| s.match_string("\n"))
236             })
237         }
238 
239         state(input, |state| match rule {
240             Rule::json => json(state),
241             Rule::object => object(state),
242             Rule::pair => pair(state),
243             Rule::array => array(state),
244             Rule::value => value(state),
245             Rule::string => string(state),
246             Rule::escape => escape(state),
247             Rule::unicode => unicode(state),
248             Rule::hex => hex(state),
249             Rule::number => number(state),
250             Rule::int => int(state),
251             Rule::exp => exp(state),
252             Rule::bool => bool(state),
253             Rule::null => null(state),
254         })
255     }
256 }
257 
258 #[derive(Debug, PartialEq)]
259 enum Json<'i> {
260     Null,
261     Bool(bool),
262     Number(f64),
263     String(Span<'i>),
264     Array(Vec<Json<'i>>),
265     Object(HashMap<Span<'i>, Json<'i>>),
266 }
267 
268 fn consume(pair: Pair<Rule>) -> Json {
269     fn value(pair: Pair<Rule>) -> Json {
270         let pair = pair.into_inner().next().unwrap();
271 
272         match pair.as_rule() {
273             Rule::null => Json::Null,
274             Rule::bool => match pair.as_str() {
275                 "false" => Json::Bool(false),
276                 "true" => Json::Bool(true),
277                 _ => unreachable!(),
278             },
279             Rule::number => Json::Number(pair.as_str().parse().unwrap()),
280             Rule::string => Json::String(pair.as_span()),
281             Rule::array => Json::Array(pair.into_inner().map(value).collect()),
282             Rule::object => {
283                 let pairs = pair.into_inner().map(|pos| {
284                     let mut pair = pos.into_inner();
285 
286                     let key = pair.next().unwrap().as_span();
287                     let value = value(pair.next().unwrap());
288 
289                     (key, value)
290                 });
291 
292                 Json::Object(pairs.collect())
293             }
294             _ => unreachable!(),
295         }
296     }
297 
298     value(pair)
299 }
300 
301 #[test]
302 fn null() {
303     parses_to! {
304         parser: JsonParser,
305         input: "null",
306         rule: Rule::null,
307         tokens: [
308             null(0, 4)
309         ]
310     };
311 }
312 
313 #[test]
314 fn bool() {
315     parses_to! {
316         parser: JsonParser,
317         input: "false",
318         rule: Rule::bool,
319         tokens: [
320             bool(0, 5)
321         ]
322     };
323 }
324 
325 #[test]
326 fn number_zero() {
327     parses_to! {
328         parser: JsonParser,
329         input: "0",
330         rule: Rule::number,
331         tokens: [
332             number(0, 1)
333         ]
334     };
335 }
336 
337 #[test]
338 fn float() {
339     parses_to! {
340         parser: JsonParser,
341         input: "100.001",
342         rule: Rule::number,
343         tokens: [
344             number(0, 7)
345         ]
346     };
347 }
348 
349 #[test]
350 fn float_with_exp() {
351     parses_to! {
352         parser: JsonParser,
353         input: "100.001E+100",
354         rule: Rule::number,
355         tokens: [
356             number(0, 12)
357         ]
358     };
359 }
360 
361 #[test]
362 fn number_minus_zero() {
363     parses_to! {
364         parser: JsonParser,
365         input: "-0",
366         rule: Rule::number,
367         tokens: [
368             number(0, 2)
369         ]
370     };
371 }
372 
373 #[test]
374 fn string_with_escapes() {
375     parses_to! {
376         parser: JsonParser,
377         input: "\"asd\\u0000\\\"\"",
378         rule: Rule::string,
379         tokens: [
380             string(0, 13)
381         ]
382     };
383 }
384 
385 #[test]
array_empty()386 fn array_empty() {
387     parses_to! {
388         parser: JsonParser,
389         input: "[ ]",
390         rule: Rule::array,
391         tokens: [
392             array(0, 3)
393         ]
394     };
395 }
396 
397 #[test]
array()398 fn array() {
399     parses_to! {
400         parser: JsonParser,
401         input: "[0.0e1, false, null, \"a\", [0]]",
402         rule: Rule::array,
403         tokens: [
404             array(0, 30, [
405                 value(1,  6, [number(1, 6)]),
406                 value(8, 13, [bool(8, 13)]),
407                 value(15, 19, [null(15, 19)]),
408                 value(21, 24, [string(21, 24)]),
409                 value(26, 29, [
410                     array(26, 29, [
411                         value(27, 28, [number(27, 28)])
412                     ])
413                 ])
414             ])
415         ]
416     };
417 }
418 
419 #[test]
object()420 fn object() {
421     parses_to! {
422         parser: JsonParser,
423         input: "{\"a\" : 3, \"b\" : [{}, 3]}",
424         rule: Rule::object,
425         tokens: [
426             object(0, 24, [
427                 pair(1, 8, [
428                     string(1, 4),
429                     value(7, 8, [number(7, 8)])
430                 ]),
431                 pair(10, 23, [
432                     string(10, 13),
433                     value(16, 23, [
434                         array(16, 23, [
435                             value(17, 19, [object(17, 19)]),
436                             value(21, 22, [number(21, 22)])
437                         ])
438                     ])
439                 ])
440             ])
441         ]
442     };
443 }
444 
445 #[test]
ast()446 fn ast() {
447     let input = "{\"a\": [null, true, 3.4]}";
448 
449     let ast = consume(
450         JsonParser::parse(Rule::json, input)
451             .unwrap()
452             .next()
453             .unwrap(),
454     );
455 
456     if let Json::Object(pairs) = ast {
457         let vals: Vec<&Json> = pairs.values().collect();
458 
459         assert_eq!(
460             **vals.first().unwrap(),
461             Json::Array(vec![Json::Null, Json::Bool(true), Json::Number(3.4)])
462         );
463     }
464 }
465