1 //
2 // Copyright 2015-2016 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <grpc/support/port_platform.h>
18
19 #include <inttypes.h>
20 #include <stdlib.h>
21
22 #include <algorithm>
23 #include <initializer_list>
24 #include <map>
25 #include <string>
26 #include <utility>
27 #include <vector>
28
29 #include "absl/base/attributes.h"
30 #include "absl/status/status.h"
31 #include "absl/status/statusor.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/str_format.h"
34 #include "absl/strings/str_join.h"
35 #include "absl/strings/string_view.h"
36 #include "absl/types/variant.h"
37
38 #include <grpc/support/json.h>
39 #include <grpc/support/log.h>
40
41 #include "src/core/lib/gprpp/match.h"
42 #include "src/core/lib/json/json.h"
43
44 #define GRPC_JSON_MAX_DEPTH 255
45 #define GRPC_JSON_MAX_ERRORS 16
46
47 namespace grpc_core {
48
49 namespace {
50
51 class JsonReader {
52 public:
53 static absl::StatusOr<Json> Parse(absl::string_view input);
54
55 private:
56 enum class Status {
57 GRPC_JSON_DONE, // The parser finished successfully.
58 GRPC_JSON_PARSE_ERROR, // The parser found an error in the json stream.
59 GRPC_JSON_INTERNAL_ERROR // The parser got an internal error.
60 };
61
62 enum class State {
63 GRPC_JSON_STATE_OBJECT_KEY_BEGIN,
64 GRPC_JSON_STATE_OBJECT_KEY_STRING,
65 GRPC_JSON_STATE_OBJECT_KEY_END,
66 GRPC_JSON_STATE_VALUE_BEGIN,
67 GRPC_JSON_STATE_VALUE_STRING,
68 GRPC_JSON_STATE_STRING_ESCAPE,
69 GRPC_JSON_STATE_STRING_ESCAPE_U1,
70 GRPC_JSON_STATE_STRING_ESCAPE_U2,
71 GRPC_JSON_STATE_STRING_ESCAPE_U3,
72 GRPC_JSON_STATE_STRING_ESCAPE_U4,
73 GRPC_JSON_STATE_VALUE_NUMBER,
74 GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL,
75 GRPC_JSON_STATE_VALUE_NUMBER_ZERO,
76 GRPC_JSON_STATE_VALUE_NUMBER_DOT,
77 GRPC_JSON_STATE_VALUE_NUMBER_E,
78 GRPC_JSON_STATE_VALUE_NUMBER_EPM,
79 GRPC_JSON_STATE_VALUE_TRUE_R,
80 GRPC_JSON_STATE_VALUE_TRUE_U,
81 GRPC_JSON_STATE_VALUE_TRUE_E,
82 GRPC_JSON_STATE_VALUE_FALSE_A,
83 GRPC_JSON_STATE_VALUE_FALSE_L,
84 GRPC_JSON_STATE_VALUE_FALSE_S,
85 GRPC_JSON_STATE_VALUE_FALSE_E,
86 GRPC_JSON_STATE_VALUE_NULL_U,
87 GRPC_JSON_STATE_VALUE_NULL_L1,
88 GRPC_JSON_STATE_VALUE_NULL_L2,
89 GRPC_JSON_STATE_VALUE_END,
90 GRPC_JSON_STATE_END
91 };
92
93 // The first non-unicode value is 0x110000. But let's pick
94 // a value high enough to start our error codes from. These
95 // values are safe to return from the read_char function.
96 //
97 static constexpr uint32_t GRPC_JSON_READ_CHAR_EOF = 0x7ffffff0;
98
99 struct Scope {
100 std::string parent_object_key;
101 absl::variant<Json::Object, Json::Array> data;
102
typegrpc_core::__anonaa3266910111::JsonReader::Scope103 Json::Type type() const {
104 return Match(
105 data, [](const Json::Object&) { return Json::Type::kObject; },
106 [](const Json::Array&) { return Json::Type::kArray; });
107 }
108
TakeAsJsongrpc_core::__anonaa3266910111::JsonReader::Scope109 Json TakeAsJson() {
110 return MatchMutable(
111 &data,
112 [&](Json::Object* object) {
113 return Json::FromObject(std::move(*object));
114 },
115 [&](Json::Array* array) {
116 return Json::FromArray(std::move(*array));
117 });
118 }
119 };
120
JsonReader(absl::string_view input)121 explicit JsonReader(absl::string_view input)
122 : original_input_(reinterpret_cast<const uint8_t*>(input.data())),
123 input_(original_input_),
124 remaining_input_(input.size()) {}
125
126 Status Run();
127 uint32_t ReadChar();
128 bool IsComplete();
129
CurrentIndex() const130 size_t CurrentIndex() const { return input_ - original_input_ - 1; }
131
132 GRPC_MUST_USE_RESULT bool StringAddChar(uint32_t c);
133 GRPC_MUST_USE_RESULT bool StringAddUtf32(uint32_t c);
134
135 Json* CreateAndLinkValue();
136 bool StartContainer(Json::Type type);
137 void EndContainer();
138 void SetKey();
139 void SetString();
140 bool SetNumber();
141 void SetTrue();
142 void SetFalse();
143 void SetNull();
144
145 const uint8_t* original_input_;
146 const uint8_t* input_;
147 size_t remaining_input_;
148
149 State state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
150 bool escaped_string_was_key_ = false;
151 bool container_just_begun_ = false;
152 uint16_t unicode_char_ = 0;
153 uint16_t unicode_high_surrogate_ = 0;
154 std::vector<std::string> errors_;
155 bool truncated_errors_ = false;
156 uint8_t utf8_bytes_remaining_ = 0;
157 uint8_t utf8_first_byte_ = 0;
158
159 Json root_value_;
160 std::vector<Scope> stack_;
161
162 std::string key_;
163 std::string string_;
164 };
165
StringAddChar(uint32_t c)166 bool JsonReader::StringAddChar(uint32_t c) {
167 if (utf8_bytes_remaining_ == 0) {
168 if ((c & 0x80) == 0) {
169 utf8_bytes_remaining_ = 0;
170 } else if ((c & 0xe0) == 0xc0 && c >= 0xc2) {
171 /// For the UTF-8 characters with length of 2 bytes, the range of the
172 /// first byte is [0xc2, 0xdf]. Reference: Table 3-7 in
173 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
174 utf8_bytes_remaining_ = 1;
175 } else if ((c & 0xf0) == 0xe0) {
176 utf8_bytes_remaining_ = 2;
177 } else if ((c & 0xf8) == 0xf0 && c <= 0xf4) {
178 /// For the UTF-8 characters with length of 4 bytes, the range of the
179 /// first byte is [0xf0, 0xf4]. Reference: Table 3-7 in
180 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
181 utf8_bytes_remaining_ = 3;
182 } else {
183 return false;
184 }
185 utf8_first_byte_ = c;
186 } else if (utf8_bytes_remaining_ == 1) {
187 if ((c & 0xc0) != 0x80) {
188 return false;
189 }
190 --utf8_bytes_remaining_;
191 } else if (utf8_bytes_remaining_ == 2) {
192 /// For UTF-8 characters starting with 0xe0, their length is 3 bytes, and
193 /// the range of the second byte is [0xa0, 0xbf]. For UTF-8 characters
194 /// starting with 0xed, their length is 3 bytes, and the range of the second
195 /// byte is [0x80, 0x9f]. Reference: Table 3-7 in
196 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
197 if (((c & 0xc0) != 0x80) || (utf8_first_byte_ == 0xe0 && c < 0xa0) ||
198 (utf8_first_byte_ == 0xed && c > 0x9f)) {
199 return false;
200 }
201 --utf8_bytes_remaining_;
202 } else if (utf8_bytes_remaining_ == 3) {
203 /// For UTF-8 characters starting with 0xf0, their length is 4 bytes, and
204 /// the range of the second byte is [0x90, 0xbf]. For UTF-8 characters
205 /// starting with 0xf4, their length is 4 bytes, and the range of the second
206 /// byte is [0x80, 0x8f]. Reference: Table 3-7 in
207 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
208 if (((c & 0xc0) != 0x80) || (utf8_first_byte_ == 0xf0 && c < 0x90) ||
209 (utf8_first_byte_ == 0xf4 && c > 0x8f)) {
210 return false;
211 }
212 --utf8_bytes_remaining_;
213 } else {
214 abort();
215 }
216
217 string_.push_back(static_cast<uint8_t>(c));
218 return true;
219 }
220
StringAddUtf32(uint32_t c)221 bool JsonReader::StringAddUtf32(uint32_t c) {
222 if (c <= 0x7f) {
223 return StringAddChar(c);
224 } else if (c <= 0x7ff) {
225 uint32_t b1 = 0xc0 | ((c >> 6) & 0x1f);
226 uint32_t b2 = 0x80 | (c & 0x3f);
227 return StringAddChar(b1) && StringAddChar(b2);
228 } else if (c <= 0xffff) {
229 uint32_t b1 = 0xe0 | ((c >> 12) & 0x0f);
230 uint32_t b2 = 0x80 | ((c >> 6) & 0x3f);
231 uint32_t b3 = 0x80 | (c & 0x3f);
232 return StringAddChar(b1) && StringAddChar(b2) && StringAddChar(b3);
233 } else if (c <= 0x1fffff) {
234 uint32_t b1 = 0xf0 | ((c >> 18) & 0x07);
235 uint32_t b2 = 0x80 | ((c >> 12) & 0x3f);
236 uint32_t b3 = 0x80 | ((c >> 6) & 0x3f);
237 uint32_t b4 = 0x80 | (c & 0x3f);
238 return StringAddChar(b1) && StringAddChar(b2) && StringAddChar(b3) &&
239 StringAddChar(b4);
240 } else {
241 return false;
242 }
243 }
244
ReadChar()245 uint32_t JsonReader::ReadChar() {
246 if (remaining_input_ == 0) return GRPC_JSON_READ_CHAR_EOF;
247 const uint32_t r = *input_++;
248 --remaining_input_;
249 if (r == 0) {
250 remaining_input_ = 0;
251 return GRPC_JSON_READ_CHAR_EOF;
252 }
253 return r;
254 }
255
CreateAndLinkValue()256 Json* JsonReader::CreateAndLinkValue() {
257 if (stack_.empty()) return &root_value_;
258 return MatchMutable(
259 &stack_.back().data,
260 [&](Json::Object* object) { return &(*object)[std::move(key_)]; },
261 [&](Json::Array* array) {
262 array->emplace_back();
263 return &array->back();
264 });
265 }
266
StartContainer(Json::Type type)267 bool JsonReader::StartContainer(Json::Type type) {
268 if (stack_.size() == GRPC_JSON_MAX_DEPTH) {
269 if (errors_.size() == GRPC_JSON_MAX_ERRORS) {
270 truncated_errors_ = true;
271 } else {
272 errors_.push_back(
273 absl::StrFormat("exceeded max stack depth (%d) at index %" PRIuPTR,
274 GRPC_JSON_MAX_DEPTH, CurrentIndex()));
275 }
276 return false;
277 }
278 stack_.emplace_back();
279 Scope& scope = stack_.back();
280 scope.parent_object_key = std::move(key_);
281 if (type == Json::Type::kObject) {
282 scope.data = Json::Object();
283 } else {
284 GPR_ASSERT(type == Json::Type::kArray);
285 scope.data = Json::Array();
286 }
287 return true;
288 }
289
EndContainer()290 void JsonReader::EndContainer() {
291 GPR_ASSERT(!stack_.empty());
292 Scope scope = std::move(stack_.back());
293 stack_.pop_back();
294 key_ = std::move(scope.parent_object_key);
295 Json* value = CreateAndLinkValue();
296 *value = scope.TakeAsJson();
297 }
298
SetKey()299 void JsonReader::SetKey() {
300 key_ = std::move(string_);
301 string_.clear();
302 const Json::Object& object = absl::get<Json::Object>(stack_.back().data);
303 if (object.find(key_) != object.end()) {
304 if (errors_.size() == GRPC_JSON_MAX_ERRORS) {
305 truncated_errors_ = true;
306 } else {
307 errors_.push_back(
308 absl::StrFormat("duplicate key \"%s\" at index %" PRIuPTR, key_,
309 CurrentIndex() - key_.size() - 2));
310 }
311 }
312 }
313
SetString()314 void JsonReader::SetString() {
315 Json* value = CreateAndLinkValue();
316 *value = Json::FromString(std::move(string_));
317 string_.clear();
318 }
319
SetNumber()320 bool JsonReader::SetNumber() {
321 Json* value = CreateAndLinkValue();
322 *value = Json::FromNumber(std::move(string_));
323 string_.clear();
324 return true;
325 }
326
SetTrue()327 void JsonReader::SetTrue() {
328 Json* value = CreateAndLinkValue();
329 *value = Json::FromBool(true);
330 string_.clear();
331 }
332
SetFalse()333 void JsonReader::SetFalse() {
334 Json* value = CreateAndLinkValue();
335 *value = Json::FromBool(false);
336 string_.clear();
337 }
338
SetNull()339 void JsonReader::SetNull() { CreateAndLinkValue(); }
340
IsComplete()341 bool JsonReader::IsComplete() {
342 return (stack_.empty() && (state_ == State::GRPC_JSON_STATE_END ||
343 state_ == State::GRPC_JSON_STATE_VALUE_END));
344 }
345
346 // Call this function to start parsing the input. It will return the following:
347 // . GRPC_JSON_DONE if the input got eof, and the parsing finished
348 // successfully.
349 // . GRPC_JSON_PARSE_ERROR if the input was somehow invalid.
350 // . GRPC_JSON_INTERNAL_ERROR if the parser somehow ended into an invalid
351 // internal state.
352 //
Run()353 JsonReader::Status JsonReader::Run() {
354 uint32_t c;
355
356 // This state-machine is a strict implementation of ECMA-404
357 while (true) {
358 c = ReadChar();
359 switch (c) {
360 // Let's process the error case first.
361 case GRPC_JSON_READ_CHAR_EOF:
362 switch (state_) {
363 case State::GRPC_JSON_STATE_VALUE_NUMBER:
364 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
365 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
366 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
367 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
368 state_ = State::GRPC_JSON_STATE_VALUE_END;
369 break;
370
371 default:
372 break;
373 }
374 if (IsComplete()) {
375 return Status::GRPC_JSON_DONE;
376 }
377 return Status::GRPC_JSON_PARSE_ERROR;
378
379 // Processing whitespaces.
380 case ' ':
381 case '\t':
382 case '\n':
383 case '\r':
384 switch (state_) {
385 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
386 case State::GRPC_JSON_STATE_OBJECT_KEY_END:
387 case State::GRPC_JSON_STATE_VALUE_BEGIN:
388 case State::GRPC_JSON_STATE_VALUE_END:
389 case State::GRPC_JSON_STATE_END:
390 break;
391
392 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
393 case State::GRPC_JSON_STATE_VALUE_STRING:
394 if (c != ' ') return Status::GRPC_JSON_PARSE_ERROR;
395 if (unicode_high_surrogate_ != 0) {
396 return Status::GRPC_JSON_PARSE_ERROR;
397 }
398 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
399 break;
400
401 case State::GRPC_JSON_STATE_VALUE_NUMBER:
402 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
403 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
404 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
405 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
406 state_ = State::GRPC_JSON_STATE_VALUE_END;
407 break;
408
409 default:
410 return Status::GRPC_JSON_PARSE_ERROR;
411 }
412 break;
413
414 // Value, object or array terminations.
415 case ',':
416 case '}':
417 case ']':
418 switch (state_) {
419 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
420 case State::GRPC_JSON_STATE_VALUE_STRING:
421 if (unicode_high_surrogate_ != 0) {
422 return Status::GRPC_JSON_PARSE_ERROR;
423 }
424 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
425 break;
426
427 case State::GRPC_JSON_STATE_VALUE_NUMBER:
428 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
429 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
430 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
431 if (stack_.empty()) {
432 return Status::GRPC_JSON_PARSE_ERROR;
433 } else if (c == '}' &&
434 stack_.back().type() != Json::Type::kObject) {
435 return Status::GRPC_JSON_PARSE_ERROR;
436 } else if (c == ']' && stack_.back().type() != Json::Type::kArray) {
437 return Status::GRPC_JSON_PARSE_ERROR;
438 }
439 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
440 state_ = State::GRPC_JSON_STATE_VALUE_END;
441 ABSL_FALLTHROUGH_INTENDED;
442
443 case State::GRPC_JSON_STATE_VALUE_END:
444 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
445 case State::GRPC_JSON_STATE_VALUE_BEGIN:
446 if (c == ',') {
447 if (state_ != State::GRPC_JSON_STATE_VALUE_END) {
448 return Status::GRPC_JSON_PARSE_ERROR;
449 }
450 if (!stack_.empty() &&
451 stack_.back().type() == Json::Type::kObject) {
452 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN;
453 } else if (!stack_.empty() &&
454 stack_.back().type() == Json::Type::kArray) {
455 state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
456 } else {
457 return Status::GRPC_JSON_PARSE_ERROR;
458 }
459 } else {
460 if (stack_.empty()) {
461 return Status::GRPC_JSON_PARSE_ERROR;
462 }
463 if (c == '}' && stack_.back().type() != Json::Type::kObject) {
464 return Status::GRPC_JSON_PARSE_ERROR;
465 }
466 if (c == '}' &&
467 state_ == State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN &&
468 !container_just_begun_) {
469 return Status::GRPC_JSON_PARSE_ERROR;
470 }
471 if (c == ']' && stack_.back().type() != Json::Type::kArray) {
472 return Status::GRPC_JSON_PARSE_ERROR;
473 }
474 if (c == ']' && state_ == State::GRPC_JSON_STATE_VALUE_BEGIN &&
475 !container_just_begun_) {
476 return Status::GRPC_JSON_PARSE_ERROR;
477 }
478 state_ = State::GRPC_JSON_STATE_VALUE_END;
479 container_just_begun_ = false;
480 EndContainer();
481 if (stack_.empty()) {
482 state_ = State::GRPC_JSON_STATE_END;
483 }
484 }
485 break;
486
487 default:
488 return Status::GRPC_JSON_PARSE_ERROR;
489 }
490 break;
491
492 // In-string escaping.
493 case '\\':
494 switch (state_) {
495 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
496 escaped_string_was_key_ = true;
497 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE;
498 break;
499
500 case State::GRPC_JSON_STATE_VALUE_STRING:
501 escaped_string_was_key_ = false;
502 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE;
503 break;
504
505 // This is the \\ case.
506 case State::GRPC_JSON_STATE_STRING_ESCAPE:
507 if (unicode_high_surrogate_ != 0) {
508 return Status::GRPC_JSON_PARSE_ERROR;
509 }
510 if (!StringAddChar('\\')) return Status::GRPC_JSON_PARSE_ERROR;
511 if (escaped_string_was_key_) {
512 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
513 } else {
514 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
515 }
516 break;
517
518 default:
519 return Status::GRPC_JSON_PARSE_ERROR;
520 }
521 break;
522
523 default:
524 container_just_begun_ = false;
525 switch (state_) {
526 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
527 if (c != '"') return Status::GRPC_JSON_PARSE_ERROR;
528 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
529 break;
530
531 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
532 if (unicode_high_surrogate_ != 0) {
533 return Status::GRPC_JSON_PARSE_ERROR;
534 }
535 if (c == '"') {
536 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_END;
537 // Once the key is parsed, there should no un-matched utf8
538 // encoded bytes.
539 if (utf8_bytes_remaining_ != 0) {
540 return Status::GRPC_JSON_PARSE_ERROR;
541 }
542 SetKey();
543 } else {
544 if (c < 32) return Status::GRPC_JSON_PARSE_ERROR;
545 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
546 }
547 break;
548
549 case State::GRPC_JSON_STATE_VALUE_STRING:
550 if (unicode_high_surrogate_ != 0) {
551 return Status::GRPC_JSON_PARSE_ERROR;
552 }
553 if (c == '"') {
554 state_ = State::GRPC_JSON_STATE_VALUE_END;
555 // Once the value is parsed, there should no un-matched utf8
556 // encoded bytes.
557 if (utf8_bytes_remaining_ != 0) {
558 return Status::GRPC_JSON_PARSE_ERROR;
559 }
560 SetString();
561 } else {
562 if (c < 32) return Status::GRPC_JSON_PARSE_ERROR;
563 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
564 }
565 break;
566
567 case State::GRPC_JSON_STATE_OBJECT_KEY_END:
568 if (c != ':') return Status::GRPC_JSON_PARSE_ERROR;
569 state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
570 break;
571
572 case State::GRPC_JSON_STATE_VALUE_BEGIN:
573 switch (c) {
574 case 't':
575 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_R;
576 break;
577
578 case 'f':
579 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_A;
580 break;
581
582 case 'n':
583 state_ = State::GRPC_JSON_STATE_VALUE_NULL_U;
584 break;
585
586 case '"':
587 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
588 break;
589
590 case '0':
591 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
592 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO;
593 break;
594
595 case '1':
596 case '2':
597 case '3':
598 case '4':
599 case '5':
600 case '6':
601 case '7':
602 case '8':
603 case '9':
604 case '-':
605 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
606 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER;
607 break;
608
609 case '{':
610 container_just_begun_ = true;
611 if (!StartContainer(Json::Type::kObject)) {
612 return Status::GRPC_JSON_PARSE_ERROR;
613 }
614 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN;
615 break;
616
617 case '[':
618 container_just_begun_ = true;
619 if (!StartContainer(Json::Type::kArray)) {
620 return Status::GRPC_JSON_PARSE_ERROR;
621 }
622 break;
623 default:
624 return Status::GRPC_JSON_PARSE_ERROR;
625 }
626 break;
627
628 case State::GRPC_JSON_STATE_STRING_ESCAPE:
629 if (escaped_string_was_key_) {
630 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
631 } else {
632 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
633 }
634 if (unicode_high_surrogate_ && c != 'u') {
635 return Status::GRPC_JSON_PARSE_ERROR;
636 }
637 switch (c) {
638 case '"':
639 case '/':
640 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
641 break;
642 case 'b':
643 if (!StringAddChar('\b')) return Status::GRPC_JSON_PARSE_ERROR;
644 break;
645 case 'f':
646 if (!StringAddChar('\f')) return Status::GRPC_JSON_PARSE_ERROR;
647 break;
648 case 'n':
649 if (!StringAddChar('\n')) return Status::GRPC_JSON_PARSE_ERROR;
650 break;
651 case 'r':
652 if (!StringAddChar('\r')) return Status::GRPC_JSON_PARSE_ERROR;
653 break;
654 case 't':
655 if (!StringAddChar('\t')) return Status::GRPC_JSON_PARSE_ERROR;
656 break;
657 case 'u':
658 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U1;
659 unicode_char_ = 0;
660 break;
661 default:
662 return Status::GRPC_JSON_PARSE_ERROR;
663 }
664 break;
665
666 case State::GRPC_JSON_STATE_STRING_ESCAPE_U1:
667 case State::GRPC_JSON_STATE_STRING_ESCAPE_U2:
668 case State::GRPC_JSON_STATE_STRING_ESCAPE_U3:
669 case State::GRPC_JSON_STATE_STRING_ESCAPE_U4:
670 if ((c >= '0') && (c <= '9')) {
671 c -= '0';
672 } else if ((c >= 'A') && (c <= 'F')) {
673 c -= 'A' - 10;
674 } else if ((c >= 'a') && (c <= 'f')) {
675 c -= 'a' - 10;
676 } else {
677 return Status::GRPC_JSON_PARSE_ERROR;
678 }
679 unicode_char_ = static_cast<uint16_t>(unicode_char_ << 4);
680 unicode_char_ = static_cast<uint16_t>(unicode_char_ | c);
681
682 switch (state_) {
683 case State::GRPC_JSON_STATE_STRING_ESCAPE_U1:
684 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U2;
685 break;
686 case State::GRPC_JSON_STATE_STRING_ESCAPE_U2:
687 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U3;
688 break;
689 case State::GRPC_JSON_STATE_STRING_ESCAPE_U3:
690 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U4;
691 break;
692 case State::GRPC_JSON_STATE_STRING_ESCAPE_U4:
693 // See grpc_json_writer_escape_string to have a description
694 // of what's going on here.
695 //
696 if ((unicode_char_ & 0xfc00) == 0xd800) {
697 // high surrogate utf-16
698 if (unicode_high_surrogate_ != 0) {
699 return Status::GRPC_JSON_PARSE_ERROR;
700 }
701 unicode_high_surrogate_ = unicode_char_;
702 } else if ((unicode_char_ & 0xfc00) == 0xdc00) {
703 // low surrogate utf-16
704 uint32_t utf32;
705 if (unicode_high_surrogate_ == 0) {
706 return Status::GRPC_JSON_PARSE_ERROR;
707 }
708 utf32 = 0x10000;
709 utf32 += static_cast<uint32_t>(
710 (unicode_high_surrogate_ - 0xd800) * 0x400);
711 utf32 += static_cast<uint32_t>(unicode_char_ - 0xdc00);
712 if (!StringAddUtf32(utf32)) {
713 return Status::GRPC_JSON_PARSE_ERROR;
714 }
715 unicode_high_surrogate_ = 0;
716 } else {
717 // anything else
718 if (unicode_high_surrogate_ != 0) {
719 return Status::GRPC_JSON_PARSE_ERROR;
720 }
721 if (!StringAddUtf32(unicode_char_)) {
722 return Status::GRPC_JSON_PARSE_ERROR;
723 }
724 }
725 if (escaped_string_was_key_) {
726 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
727 } else {
728 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
729 }
730 break;
731 default:
732 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
733 }
734 break;
735
736 case State::GRPC_JSON_STATE_VALUE_NUMBER:
737 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
738 switch (c) {
739 case '0':
740 case '1':
741 case '2':
742 case '3':
743 case '4':
744 case '5':
745 case '6':
746 case '7':
747 case '8':
748 case '9':
749 break;
750 case 'e':
751 case 'E':
752 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_E;
753 break;
754 case '.':
755 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_DOT;
756 break;
757 default:
758 return Status::GRPC_JSON_PARSE_ERROR;
759 }
760 break;
761
762 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
763 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
764 switch (c) {
765 case '0':
766 case '1':
767 case '2':
768 case '3':
769 case '4':
770 case '5':
771 case '6':
772 case '7':
773 case '8':
774 case '9':
775 break;
776 case 'e':
777 case 'E':
778 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_E;
779 break;
780 default:
781 return Status::GRPC_JSON_PARSE_ERROR;
782 }
783 break;
784
785 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
786 if (c != '.') return Status::GRPC_JSON_PARSE_ERROR;
787 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
788 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_DOT;
789 break;
790
791 case State::GRPC_JSON_STATE_VALUE_NUMBER_DOT:
792 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
793 switch (c) {
794 case '0':
795 case '1':
796 case '2':
797 case '3':
798 case '4':
799 case '5':
800 case '6':
801 case '7':
802 case '8':
803 case '9':
804 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL;
805 break;
806 default:
807 return Status::GRPC_JSON_PARSE_ERROR;
808 }
809 break;
810
811 case State::GRPC_JSON_STATE_VALUE_NUMBER_E:
812 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
813 switch (c) {
814 case '0':
815 case '1':
816 case '2':
817 case '3':
818 case '4':
819 case '5':
820 case '6':
821 case '7':
822 case '8':
823 case '9':
824 case '+':
825 case '-':
826 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_EPM;
827 break;
828 default:
829 return Status::GRPC_JSON_PARSE_ERROR;
830 }
831 break;
832
833 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
834 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
835 switch (c) {
836 case '0':
837 case '1':
838 case '2':
839 case '3':
840 case '4':
841 case '5':
842 case '6':
843 case '7':
844 case '8':
845 case '9':
846 break;
847 default:
848 return Status::GRPC_JSON_PARSE_ERROR;
849 }
850 break;
851
852 case State::GRPC_JSON_STATE_VALUE_TRUE_R:
853 if (c != 'r') return Status::GRPC_JSON_PARSE_ERROR;
854 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_U;
855 break;
856
857 case State::GRPC_JSON_STATE_VALUE_TRUE_U:
858 if (c != 'u') return Status::GRPC_JSON_PARSE_ERROR;
859 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_E;
860 break;
861
862 case State::GRPC_JSON_STATE_VALUE_TRUE_E:
863 if (c != 'e') return Status::GRPC_JSON_PARSE_ERROR;
864 SetTrue();
865 state_ = State::GRPC_JSON_STATE_VALUE_END;
866 break;
867
868 case State::GRPC_JSON_STATE_VALUE_FALSE_A:
869 if (c != 'a') return Status::GRPC_JSON_PARSE_ERROR;
870 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_L;
871 break;
872
873 case State::GRPC_JSON_STATE_VALUE_FALSE_L:
874 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
875 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_S;
876 break;
877
878 case State::GRPC_JSON_STATE_VALUE_FALSE_S:
879 if (c != 's') return Status::GRPC_JSON_PARSE_ERROR;
880 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_E;
881 break;
882
883 case State::GRPC_JSON_STATE_VALUE_FALSE_E:
884 if (c != 'e') return Status::GRPC_JSON_PARSE_ERROR;
885 SetFalse();
886 state_ = State::GRPC_JSON_STATE_VALUE_END;
887 break;
888
889 case State::GRPC_JSON_STATE_VALUE_NULL_U:
890 if (c != 'u') return Status::GRPC_JSON_PARSE_ERROR;
891 state_ = State::GRPC_JSON_STATE_VALUE_NULL_L1;
892 break;
893
894 case State::GRPC_JSON_STATE_VALUE_NULL_L1:
895 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
896 state_ = State::GRPC_JSON_STATE_VALUE_NULL_L2;
897 break;
898
899 case State::GRPC_JSON_STATE_VALUE_NULL_L2:
900 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
901 SetNull();
902 state_ = State::GRPC_JSON_STATE_VALUE_END;
903 break;
904
905 // All of the VALUE_END cases are handled in the specialized case
906 // above.
907 case State::GRPC_JSON_STATE_VALUE_END:
908 switch (c) {
909 case ',':
910 case '}':
911 case ']':
912 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
913 break;
914
915 default:
916 return Status::GRPC_JSON_PARSE_ERROR;
917 }
918 break;
919
920 case State::GRPC_JSON_STATE_END:
921 return Status::GRPC_JSON_PARSE_ERROR;
922 }
923 }
924 }
925
926 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
927 }
928
Parse(absl::string_view input)929 absl::StatusOr<Json> JsonReader::Parse(absl::string_view input) {
930 JsonReader reader(input);
931 Status status = reader.Run();
932 if (reader.truncated_errors_) {
933 reader.errors_.push_back(
934 "too many errors encountered during JSON parsing -- fix reported "
935 "errors and try again to see additional errors");
936 }
937 if (status == Status::GRPC_JSON_INTERNAL_ERROR) {
938 reader.errors_.push_back(absl::StrCat(
939 "internal error in JSON parser at index ", reader.CurrentIndex()));
940 } else if (status == Status::GRPC_JSON_PARSE_ERROR) {
941 reader.errors_.push_back(
942 absl::StrCat("JSON parse error at index ", reader.CurrentIndex()));
943 }
944 if (!reader.errors_.empty()) {
945 return absl::InvalidArgumentError(absl::StrCat(
946 "JSON parsing failed: [", absl::StrJoin(reader.errors_, "; "), "]"));
947 }
948 return std::move(reader.root_value_);
949 }
950
951 } // namespace
952
JsonParse(absl::string_view json_str)953 absl::StatusOr<Json> JsonParse(absl::string_view json_str) {
954 return JsonReader::Parse(json_str);
955 }
956
957 } // namespace grpc_core
958