1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 #[macro_use]
11 extern crate pest;
12
13 use std::collections::HashMap;
14
15 use pest::error::Error;
16 use pest::iterators::{Pair, Pairs};
17 use pest::{state, ParseResult, Parser, ParserState, Span};
18
19 #[allow(dead_code, non_camel_case_types)]
20 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
21 enum Rule {
22 json,
23 object,
24 pair,
25 array,
26 value,
27 string,
28 escape,
29 unicode,
30 hex,
31 number,
32 int,
33 exp,
34 bool,
35 null,
36 }
37
38 struct JsonParser;
39
40 impl Parser<Rule> for JsonParser {
41 // false positive: pest uses `..` as a complete range (historically)
42 #[allow(clippy::almost_complete_range)]
parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>>43 fn parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>> {
44 fn json(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
45 value(state)
46 }
47
48 fn object(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
49 state.rule(Rule::object, |s| {
50 s.sequence(|s| {
51 s.match_string("{")
52 .and_then(skip)
53 .and_then(pair)
54 .and_then(skip)
55 .and_then(|s| {
56 s.repeat(|s| {
57 s.sequence(|s| {
58 s.match_string(",")
59 .and_then(skip)
60 .and_then(pair)
61 .and_then(skip)
62 })
63 })
64 })
65 .and_then(|s| s.match_string("}"))
66 })
67 .or_else(|s| {
68 s.sequence(|s| {
69 s.match_string("{")
70 .and_then(skip)
71 .and_then(|s| s.match_string("}"))
72 })
73 })
74 })
75 }
76
77 fn pair(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
78 state.rule(Rule::pair, |s| {
79 s.sequence(|s| {
80 string(s)
81 .and_then(skip)
82 .and_then(|s| s.match_string(":"))
83 .and_then(skip)
84 .and_then(value)
85 })
86 })
87 }
88
89 fn array(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
90 state.rule(Rule::array, |s| {
91 s.sequence(|s| {
92 s.match_string("[")
93 .and_then(skip)
94 .and_then(value)
95 .and_then(skip)
96 .and_then(|s| {
97 s.repeat(|s| {
98 s.sequence(|s| {
99 s.match_string(",")
100 .and_then(skip)
101 .and_then(value)
102 .and_then(skip)
103 })
104 })
105 })
106 .and_then(|s| s.match_string("]"))
107 })
108 .or_else(|s| {
109 s.sequence(|s| {
110 s.match_string("[")
111 .and_then(skip)
112 .and_then(|s| s.match_string("]"))
113 })
114 })
115 })
116 }
117
118 fn value(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
119 state.rule(Rule::value, |s| {
120 string(s)
121 .or_else(number)
122 .or_else(object)
123 .or_else(array)
124 .or_else(bool)
125 .or_else(null)
126 })
127 }
128
129 fn string(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
130 state.rule(Rule::string, |s| {
131 s.match_string("\"")
132 .and_then(|s| {
133 s.repeat(|s| {
134 escape(s).or_else(|s| {
135 s.sequence(|s| {
136 s.lookahead(false, |s| {
137 s.match_string("\"").or_else(|s| s.match_string("\\"))
138 })
139 .and_then(|s| s.skip(1))
140 })
141 })
142 })
143 })
144 .and_then(|pos| pos.match_string("\""))
145 })
146 }
147
148 fn escape(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
149 state.sequence(|s| {
150 s.match_string("\\").and_then(|s| {
151 s.match_string("\"")
152 .or_else(|s| s.match_string("\\"))
153 .or_else(|s| s.match_string("/"))
154 .or_else(|s| s.match_string("b"))
155 .or_else(|s| s.match_string("f"))
156 .or_else(|s| s.match_string("n"))
157 .or_else(|s| s.match_string("r"))
158 .or_else(|s| s.match_string("t"))
159 .or_else(unicode)
160 })
161 })
162 }
163
164 fn unicode(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
165 state.sequence(|s| {
166 s.match_string("u")
167 .and_then(hex)
168 .and_then(hex)
169 .and_then(hex)
170 })
171 }
172
173 fn hex(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
174 state
175 .match_range('0'..'9')
176 .or_else(|s| s.match_range('a'..'f'))
177 .or_else(|s| s.match_range('A'..'F'))
178 }
179
180 fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
181 state.rule(Rule::number, |s| {
182 s.sequence(|s| {
183 s.optional(|s| s.match_string("-"))
184 .and_then(int)
185 .and_then(|s| {
186 s.optional(|s| {
187 s.sequence(|s| {
188 s.match_string(".")
189 .and_then(|s| s.match_range('0'..'9'))
190 .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
191 .and_then(|s| s.optional(exp))
192 .or_else(exp)
193 })
194 })
195 })
196 })
197 })
198 }
199
200 fn int(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
201 state.match_string("0").or_else(|s| {
202 s.sequence(|s| {
203 s.match_range('1'..'9')
204 .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
205 })
206 })
207 }
208
209 fn exp(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
210 state.sequence(|s| {
211 s.match_string("E")
212 .or_else(|s| s.match_string("e"))
213 .and_then(|s| {
214 s.optional(|s| s.match_string("+").or_else(|s| s.match_string("-")))
215 })
216 .and_then(int)
217 })
218 }
219
220 fn bool(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
221 state.rule(Rule::bool, |s| {
222 s.match_string("true").or_else(|s| s.match_string("false"))
223 })
224 }
225
226 fn null(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
227 state.rule(Rule::null, |s| s.match_string("null"))
228 }
229
230 fn skip(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
231 state.repeat(|s| {
232 s.match_string(" ")
233 .or_else(|s| s.match_string("\t"))
234 .or_else(|s| s.match_string("\r"))
235 .or_else(|s| s.match_string("\n"))
236 })
237 }
238
239 state(input, |state| match rule {
240 Rule::json => json(state),
241 Rule::object => object(state),
242 Rule::pair => pair(state),
243 Rule::array => array(state),
244 Rule::value => value(state),
245 Rule::string => string(state),
246 Rule::escape => escape(state),
247 Rule::unicode => unicode(state),
248 Rule::hex => hex(state),
249 Rule::number => number(state),
250 Rule::int => int(state),
251 Rule::exp => exp(state),
252 Rule::bool => bool(state),
253 Rule::null => null(state),
254 })
255 }
256 }
257
258 #[derive(Debug, PartialEq)]
259 enum Json<'i> {
260 Null,
261 Bool(bool),
262 Number(f64),
263 String(Span<'i>),
264 Array(Vec<Json<'i>>),
265 Object(HashMap<Span<'i>, Json<'i>>),
266 }
267
268 fn consume(pair: Pair<Rule>) -> Json {
269 fn value(pair: Pair<Rule>) -> Json {
270 let pair = pair.into_inner().next().unwrap();
271
272 match pair.as_rule() {
273 Rule::null => Json::Null,
274 Rule::bool => match pair.as_str() {
275 "false" => Json::Bool(false),
276 "true" => Json::Bool(true),
277 _ => unreachable!(),
278 },
279 Rule::number => Json::Number(pair.as_str().parse().unwrap()),
280 Rule::string => Json::String(pair.as_span()),
281 Rule::array => Json::Array(pair.into_inner().map(value).collect()),
282 Rule::object => {
283 let pairs = pair.into_inner().map(|pos| {
284 let mut pair = pos.into_inner();
285
286 let key = pair.next().unwrap().as_span();
287 let value = value(pair.next().unwrap());
288
289 (key, value)
290 });
291
292 Json::Object(pairs.collect())
293 }
294 _ => unreachable!(),
295 }
296 }
297
298 value(pair)
299 }
300
301 #[test]
302 fn null() {
303 parses_to! {
304 parser: JsonParser,
305 input: "null",
306 rule: Rule::null,
307 tokens: [
308 null(0, 4)
309 ]
310 };
311 }
312
313 #[test]
314 fn bool() {
315 parses_to! {
316 parser: JsonParser,
317 input: "false",
318 rule: Rule::bool,
319 tokens: [
320 bool(0, 5)
321 ]
322 };
323 }
324
325 #[test]
326 fn number_zero() {
327 parses_to! {
328 parser: JsonParser,
329 input: "0",
330 rule: Rule::number,
331 tokens: [
332 number(0, 1)
333 ]
334 };
335 }
336
337 #[test]
338 fn float() {
339 parses_to! {
340 parser: JsonParser,
341 input: "100.001",
342 rule: Rule::number,
343 tokens: [
344 number(0, 7)
345 ]
346 };
347 }
348
349 #[test]
350 fn float_with_exp() {
351 parses_to! {
352 parser: JsonParser,
353 input: "100.001E+100",
354 rule: Rule::number,
355 tokens: [
356 number(0, 12)
357 ]
358 };
359 }
360
361 #[test]
362 fn number_minus_zero() {
363 parses_to! {
364 parser: JsonParser,
365 input: "-0",
366 rule: Rule::number,
367 tokens: [
368 number(0, 2)
369 ]
370 };
371 }
372
373 #[test]
374 fn string_with_escapes() {
375 parses_to! {
376 parser: JsonParser,
377 input: "\"asd\\u0000\\\"\"",
378 rule: Rule::string,
379 tokens: [
380 string(0, 13)
381 ]
382 };
383 }
384
385 #[test]
array_empty()386 fn array_empty() {
387 parses_to! {
388 parser: JsonParser,
389 input: "[ ]",
390 rule: Rule::array,
391 tokens: [
392 array(0, 3)
393 ]
394 };
395 }
396
397 #[test]
array()398 fn array() {
399 parses_to! {
400 parser: JsonParser,
401 input: "[0.0e1, false, null, \"a\", [0]]",
402 rule: Rule::array,
403 tokens: [
404 array(0, 30, [
405 value(1, 6, [number(1, 6)]),
406 value(8, 13, [bool(8, 13)]),
407 value(15, 19, [null(15, 19)]),
408 value(21, 24, [string(21, 24)]),
409 value(26, 29, [
410 array(26, 29, [
411 value(27, 28, [number(27, 28)])
412 ])
413 ])
414 ])
415 ]
416 };
417 }
418
419 #[test]
object()420 fn object() {
421 parses_to! {
422 parser: JsonParser,
423 input: "{\"a\" : 3, \"b\" : [{}, 3]}",
424 rule: Rule::object,
425 tokens: [
426 object(0, 24, [
427 pair(1, 8, [
428 string(1, 4),
429 value(7, 8, [number(7, 8)])
430 ]),
431 pair(10, 23, [
432 string(10, 13),
433 value(16, 23, [
434 array(16, 23, [
435 value(17, 19, [object(17, 19)]),
436 value(21, 22, [number(21, 22)])
437 ])
438 ])
439 ])
440 ])
441 ]
442 };
443 }
444
445 #[test]
ast()446 fn ast() {
447 let input = "{\"a\": [null, true, 3.4]}";
448
449 let ast = consume(
450 JsonParser::parse(Rule::json, input)
451 .unwrap()
452 .next()
453 .unwrap(),
454 );
455
456 if let Json::Object(pairs) = ast {
457 let vals: Vec<&Json> = pairs.values().collect();
458
459 assert_eq!(
460 **vals.first().unwrap(),
461 Json::Array(vec![Json::Null, Json::Bool(true), Json::Number(3.4)])
462 );
463 }
464 }
465