1 #region Copyright notice and license 2 // Protocol Buffers - Google's data interchange format 3 // Copyright 2008 Google Inc. All rights reserved. 4 // https://developers.google.com/protocol-buffers/ 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 #endregion 32 using System; 33 using System.Collections.Generic; 34 using System.Globalization; 35 using System.IO; 36 using System.Text; 37 38 namespace Google.Protobuf 39 { 40 /// <summary> 41 /// Simple but strict JSON tokenizer, rigidly following RFC 7159. 42 /// </summary> 43 /// <remarks> 44 /// <para> 45 /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc. 46 /// It does not create tokens for the separator between names and values, or for the comma 47 /// between values. It validates the token stream as it goes - so callers can assume that the 48 /// tokens it produces are appropriate. For example, it would never produce "start object, end array." 49 /// </para> 50 /// <para>Implementation details: the base class handles single token push-back and </para> 51 /// <para>Not thread-safe.</para> 52 /// </remarks> 53 internal abstract class JsonTokenizer 54 { 55 private JsonToken bufferedToken; 56 57 /// <summary> 58 /// Creates a tokenizer that reads from the given text reader. 59 /// </summary> FromTextReader(TextReader reader)60 internal static JsonTokenizer FromTextReader(TextReader reader) 61 { 62 return new JsonTextTokenizer(reader); 63 } 64 65 /// <summary> 66 /// Creates a tokenizer that first replays the given list of tokens, then continues reading 67 /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back 68 /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was 69 /// created for the sake of Any parsing. 70 /// </summary> FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)71 internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation) 72 { 73 return new JsonReplayTokenizer(tokens, continuation); 74 } 75 76 /// <summary> 77 /// Returns the depth of the stack, purely in objects (not collections). 78 /// Informally, this is the number of remaining unclosed '{' characters we have. 79 /// </summary> 80 internal int ObjectDepth { get; private set; } 81 82 // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous 83 // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). PushBack(JsonToken token)84 internal void PushBack(JsonToken token) 85 { 86 if (bufferedToken != null) 87 { 88 throw new InvalidOperationException("Can't push back twice"); 89 } 90 bufferedToken = token; 91 if (token.Type == JsonToken.TokenType.StartObject) 92 { 93 ObjectDepth--; 94 } 95 else if (token.Type == JsonToken.TokenType.EndObject) 96 { 97 ObjectDepth++; 98 } 99 } 100 101 /// <summary> 102 /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, 103 /// after which point <c>Next()</c> should not be called again. 104 /// </summary> 105 /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks> 106 /// <returns>The next token in the stream. This is never null.</returns> 107 /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> 108 /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> Next()109 internal JsonToken Next() 110 { 111 JsonToken tokenToReturn; 112 if (bufferedToken != null) 113 { 114 tokenToReturn = bufferedToken; 115 bufferedToken = null; 116 } 117 else 118 { 119 tokenToReturn = NextImpl(); 120 } 121 if (tokenToReturn.Type == JsonToken.TokenType.StartObject) 122 { 123 ObjectDepth++; 124 } 125 else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) 126 { 127 ObjectDepth--; 128 } 129 return tokenToReturn; 130 } 131 132 /// <summary> 133 /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates 134 /// to this if it doesn't have a buffered token.) 135 /// </summary> 136 /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> 137 /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> NextImpl()138 protected abstract JsonToken NextImpl(); 139 140 /// <summary> 141 /// Skips the value we're about to read. This must only be called immediately after reading a property name. 142 /// If the value is an object or an array, the complete object/array is skipped. 143 /// </summary> SkipValue()144 internal void SkipValue() 145 { 146 // We'll assume that Next() makes sure that the end objects and end arrays are all valid. 147 // All we care about is the total nesting depth we need to close. 148 int depth = 0; 149 150 // do/while rather than while loop so that we read at least one token. 151 do 152 { 153 var token = Next(); 154 switch (token.Type) 155 { 156 case JsonToken.TokenType.EndArray: 157 case JsonToken.TokenType.EndObject: 158 depth--; 159 break; 160 case JsonToken.TokenType.StartArray: 161 case JsonToken.TokenType.StartObject: 162 depth++; 163 break; 164 } 165 } while (depth != 0); 166 } 167 168 /// <summary> 169 /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. 170 /// </summary> 171 private class JsonReplayTokenizer : JsonTokenizer 172 { 173 private readonly IList<JsonToken> tokens; 174 private readonly JsonTokenizer nextTokenizer; 175 private int nextTokenIndex; 176 JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)177 internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer) 178 { 179 this.tokens = tokens; 180 this.nextTokenizer = nextTokenizer; 181 } 182 183 // FIXME: Object depth not maintained... NextImpl()184 protected override JsonToken NextImpl() 185 { 186 if (nextTokenIndex >= tokens.Count) 187 { 188 return nextTokenizer.Next(); 189 } 190 return tokens[nextTokenIndex++]; 191 } 192 } 193 194 /// <summary> 195 /// Tokenizer which does all the *real* work of parsing JSON. 196 /// </summary> 197 private sealed class JsonTextTokenizer : JsonTokenizer 198 { 199 // The set of states in which a value is valid next token. 200 private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; 201 202 private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); 203 private readonly PushBackReader reader; 204 private State state; 205 JsonTextTokenizer(TextReader reader)206 internal JsonTextTokenizer(TextReader reader) 207 { 208 this.reader = new PushBackReader(reader); 209 state = State.StartOfDocument; 210 containerStack.Push(ContainerType.Document); 211 } 212 213 /// <remarks> 214 /// This method essentially just loops through characters skipping whitespace, validating and 215 /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) 216 /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point 217 /// it returns the token. Although the method is large, it would be relatively hard to break down further... most 218 /// of it is the large switch statement, which sometimes returns and sometimes doesn't. 219 /// </remarks> NextImpl()220 protected override JsonToken NextImpl() 221 { 222 if (state == State.ReaderExhausted) 223 { 224 throw new InvalidOperationException("Next() called after end of document"); 225 } 226 while (true) 227 { 228 var next = reader.Read(); 229 if (next == null) 230 { 231 ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); 232 state = State.ReaderExhausted; 233 return JsonToken.EndDocument; 234 } 235 switch (next.Value) 236 { 237 // Skip whitespace between tokens 238 case ' ': 239 case '\t': 240 case '\r': 241 case '\n': 242 break; 243 case ':': 244 ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); 245 state = State.ObjectAfterColon; 246 break; 247 case ',': 248 ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a comma: "); 249 state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; 250 break; 251 case '"': 252 string stringValue = ReadString(); 253 if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) 254 { 255 state = State.ObjectBeforeColon; 256 return JsonToken.Name(stringValue); 257 } 258 else 259 { 260 ValidateAndModifyStateForValue("Invalid state to read a double quote: "); 261 return JsonToken.Value(stringValue); 262 } 263 case '{': 264 ValidateState(ValueStates, "Invalid state to read an open brace: "); 265 state = State.ObjectStart; 266 containerStack.Push(ContainerType.Object); 267 return JsonToken.StartObject; 268 case '}': 269 ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); 270 PopContainer(); 271 return JsonToken.EndObject; 272 case '[': 273 ValidateState(ValueStates, "Invalid state to read an open square bracket: "); 274 state = State.ArrayStart; 275 containerStack.Push(ContainerType.Array); 276 return JsonToken.StartArray; 277 case ']': 278 ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); 279 PopContainer(); 280 return JsonToken.EndArray; 281 case 'n': // Start of null 282 ConsumeLiteral("null"); 283 ValidateAndModifyStateForValue("Invalid state to read a null literal: "); 284 return JsonToken.Null; 285 case 't': // Start of true 286 ConsumeLiteral("true"); 287 ValidateAndModifyStateForValue("Invalid state to read a true literal: "); 288 return JsonToken.True; 289 case 'f': // Start of false 290 ConsumeLiteral("false"); 291 ValidateAndModifyStateForValue("Invalid state to read a false literal: "); 292 return JsonToken.False; 293 case '-': // Start of a number 294 case '0': 295 case '1': 296 case '2': 297 case '3': 298 case '4': 299 case '5': 300 case '6': 301 case '7': 302 case '8': 303 case '9': 304 double number = ReadNumber(next.Value); 305 ValidateAndModifyStateForValue("Invalid state to read a number token: "); 306 return JsonToken.Value(number); 307 default: 308 throw new InvalidJsonException("Invalid first character of token: " + next.Value); 309 } 310 } 311 } 312 ValidateState(State validStates, string errorPrefix)313 private void ValidateState(State validStates, string errorPrefix) 314 { 315 if ((validStates & state) == 0) 316 { 317 throw reader.CreateException(errorPrefix + state); 318 } 319 } 320 321 /// <summary> 322 /// Reads a string token. It is assumed that the opening " has already been read. 323 /// </summary> ReadString()324 private string ReadString() 325 { 326 var value = new StringBuilder(); 327 bool haveHighSurrogate = false; 328 while (true) 329 { 330 char c = reader.ReadOrFail("Unexpected end of text while reading string"); 331 if (c < ' ') 332 { 333 throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); 334 } 335 if (c == '"') 336 { 337 if (haveHighSurrogate) 338 { 339 throw reader.CreateException("Invalid use of surrogate pair code units"); 340 } 341 return value.ToString(); 342 } 343 if (c == '\\') 344 { 345 c = ReadEscapedCharacter(); 346 } 347 // TODO: Consider only allowing surrogate pairs that are either both escaped, 348 // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate 349 // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. 350 if (haveHighSurrogate != char.IsLowSurrogate(c)) 351 { 352 throw reader.CreateException("Invalid use of surrogate pair code units"); 353 } 354 haveHighSurrogate = char.IsHighSurrogate(c); 355 value.Append(c); 356 } 357 } 358 359 /// <summary> 360 /// Reads an escaped character. It is assumed that the leading backslash has already been read. 361 /// </summary> ReadEscapedCharacter()362 private char ReadEscapedCharacter() 363 { 364 char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); 365 switch (c) 366 { 367 case 'n': 368 return '\n'; 369 case '\\': 370 return '\\'; 371 case 'b': 372 return '\b'; 373 case 'f': 374 return '\f'; 375 case 'r': 376 return '\r'; 377 case 't': 378 return '\t'; 379 case '"': 380 return '"'; 381 case '/': 382 return '/'; 383 case 'u': 384 return ReadUnicodeEscape(); 385 default: 386 throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); 387 } 388 } 389 390 /// <summary> 391 /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. 392 /// </summary> ReadUnicodeEscape()393 private char ReadUnicodeEscape() 394 { 395 int result = 0; 396 for (int i = 0; i < 4; i++) 397 { 398 char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); 399 int nybble; 400 if (c >= '0' && c <= '9') 401 { 402 nybble = c - '0'; 403 } 404 else if (c >= 'a' && c <= 'f') 405 { 406 nybble = c - 'a' + 10; 407 } 408 else if (c >= 'A' && c <= 'F') 409 { 410 nybble = c - 'A' + 10; 411 } 412 else 413 { 414 throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); 415 } 416 result = (result << 4) + nybble; 417 } 418 return (char) result; 419 } 420 421 /// <summary> 422 /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. 423 /// It is assumed that the first letter of the literal has already been read. 424 /// </summary> ConsumeLiteral(string text)425 private void ConsumeLiteral(string text) 426 { 427 for (int i = 1; i < text.Length; i++) 428 { 429 char? next = reader.Read(); 430 if (next == null) 431 { 432 throw reader.CreateException("Unexpected end of text while reading literal token " + text); 433 } 434 if (next.Value != text[i]) 435 { 436 throw reader.CreateException("Unexpected character while reading literal token " + text); 437 } 438 } 439 } 440 ReadNumber(char initialCharacter)441 private double ReadNumber(char initialCharacter) 442 { 443 StringBuilder builder = new StringBuilder(); 444 if (initialCharacter == '-') 445 { 446 builder.Append("-"); 447 } 448 else 449 { 450 reader.PushBack(initialCharacter); 451 } 452 // Each method returns the character it read that doesn't belong in that part, 453 // so we know what to do next, including pushing the character back at the end. 454 // null is returned for "end of text". 455 char? next = ReadInt(builder); 456 if (next == '.') 457 { 458 next = ReadFrac(builder); 459 } 460 if (next == 'e' || next == 'E') 461 { 462 next = ReadExp(builder); 463 } 464 // If we read a character which wasn't part of the number, push it back so we can read it again 465 // to parse the next token. 466 if (next != null) 467 { 468 reader.PushBack(next.Value); 469 } 470 471 // TODO: What exception should we throw if the value can't be represented as a double? 472 try 473 { 474 double result = double.Parse(builder.ToString(), 475 NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, 476 CultureInfo.InvariantCulture); 477 478 // .NET Core 3.0 and later returns infinity if the number is too large or small to be represented. 479 // For compatibility with other Protobuf implementations the tokenizer should still throw. 480 if (double.IsInfinity(result)) 481 { 482 throw reader.CreateException("Numeric value out of range: " + builder); 483 } 484 485 return result; 486 } 487 catch (OverflowException) 488 { 489 throw reader.CreateException("Numeric value out of range: " + builder); 490 } 491 } 492 ReadInt(StringBuilder builder)493 private char? ReadInt(StringBuilder builder) 494 { 495 char first = reader.ReadOrFail("Invalid numeric literal"); 496 if (first < '0' || first > '9') 497 { 498 throw reader.CreateException("Invalid numeric literal"); 499 } 500 builder.Append(first); 501 int digitCount; 502 char? next = ConsumeDigits(builder, out digitCount); 503 if (first == '0' && digitCount != 0) 504 { 505 throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); 506 } 507 return next; 508 } 509 ReadFrac(StringBuilder builder)510 private char? ReadFrac(StringBuilder builder) 511 { 512 builder.Append('.'); // Already consumed this 513 int digitCount; 514 char? next = ConsumeDigits(builder, out digitCount); 515 if (digitCount == 0) 516 { 517 throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); 518 } 519 return next; 520 } 521 ReadExp(StringBuilder builder)522 private char? ReadExp(StringBuilder builder) 523 { 524 builder.Append('E'); // Already consumed this (or 'e') 525 char? next = reader.Read(); 526 if (next == null) 527 { 528 throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); 529 } 530 if (next == '-' || next == '+') 531 { 532 builder.Append(next.Value); 533 } 534 else 535 { 536 reader.PushBack(next.Value); 537 } 538 int digitCount; 539 next = ConsumeDigits(builder, out digitCount); 540 if (digitCount == 0) 541 { 542 throw reader.CreateException("Invalid numeric literal: exponent without value"); 543 } 544 return next; 545 } 546 ConsumeDigits(StringBuilder builder, out int count)547 private char? ConsumeDigits(StringBuilder builder, out int count) 548 { 549 count = 0; 550 while (true) 551 { 552 char? next = reader.Read(); 553 if (next == null || next.Value < '0' || next.Value > '9') 554 { 555 return next; 556 } 557 count++; 558 builder.Append(next.Value); 559 } 560 } 561 562 /// <summary> 563 /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) 564 /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. 565 /// </summary> ValidateAndModifyStateForValue(string errorPrefix)566 private void ValidateAndModifyStateForValue(string errorPrefix) 567 { 568 ValidateState(ValueStates, errorPrefix); 569 switch (state) 570 { 571 case State.StartOfDocument: 572 state = State.ExpectedEndOfDocument; 573 return; 574 case State.ObjectAfterColon: 575 state = State.ObjectAfterProperty; 576 return; 577 case State.ArrayStart: 578 case State.ArrayAfterComma: 579 state = State.ArrayAfterValue; 580 return; 581 default: 582 throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); 583 } 584 } 585 586 /// <summary> 587 /// Pops the top-most container, and sets the state to the appropriate one for the end of a value 588 /// in the parent container. 589 /// </summary> PopContainer()590 private void PopContainer() 591 { 592 containerStack.Pop(); 593 var parent = containerStack.Peek(); 594 switch (parent) 595 { 596 case ContainerType.Object: 597 state = State.ObjectAfterProperty; 598 break; 599 case ContainerType.Array: 600 state = State.ArrayAfterValue; 601 break; 602 case ContainerType.Document: 603 state = State.ExpectedEndOfDocument; 604 break; 605 default: 606 throw new InvalidOperationException("Unexpected container type: " + parent); 607 } 608 } 609 610 private enum ContainerType 611 { 612 Document, Object, Array 613 } 614 615 /// <summary> 616 /// Possible states of the tokenizer. 617 /// </summary> 618 /// <remarks> 619 /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states 620 /// for checking.</para> 621 /// <para> 622 /// Each is documented with an example, 623 /// where ^ represents the current position within the text stream. The examples all use string values, 624 /// but could be any value, including nested objects/arrays. 625 /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). 626 /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which 627 /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. 628 /// </para> 629 /// <para> 630 /// These states were derived manually by reading RFC 7159 carefully. 631 /// </para> 632 /// </remarks> 633 [Flags] 634 private enum State 635 { 636 /// <summary> 637 /// ^ { "foo": "bar" } 638 /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" 639 /// </summary> 640 StartOfDocument = 1 << 0, 641 /// <summary> 642 /// { "foo": "bar" } ^ 643 /// After the value in a document. Next states: ReaderExhausted 644 /// </summary> 645 ExpectedEndOfDocument = 1 << 1, 646 /// <summary> 647 /// { "foo": "bar" } ^ (and already read to the end of the reader) 648 /// Terminal state. 649 /// </summary> 650 ReaderExhausted = 1 << 2, 651 /// <summary> 652 /// { ^ "foo": "bar" } 653 /// Before the *first* property in an object. 654 /// Next states: 655 /// "AfterValue" (empty object) 656 /// ObjectBeforeColon (read a name) 657 /// </summary> 658 ObjectStart = 1 << 3, 659 /// <summary> 660 /// { "foo" ^ : "bar", "x": "y" } 661 /// Next state: ObjectAfterColon 662 /// </summary> 663 ObjectBeforeColon = 1 << 4, 664 /// <summary> 665 /// { "foo" : ^ "bar", "x": "y" } 666 /// Before any property other than the first in an object. 667 /// (Equivalently: after any property in an object) 668 /// Next states: 669 /// "AfterValue" (value is simple) 670 /// ObjectStart (value is object) 671 /// ArrayStart (value is array) 672 /// </summary> 673 ObjectAfterColon = 1 << 5, 674 /// <summary> 675 /// { "foo" : "bar" ^ , "x" : "y" } 676 /// At the end of a property, so expecting either a comma or end-of-object 677 /// Next states: ObjectAfterComma or "AfterValue" 678 /// </summary> 679 ObjectAfterProperty = 1 << 6, 680 /// <summary> 681 /// { "foo":"bar", ^ "x":"y" } 682 /// Read the comma after the previous property, so expecting another property. 683 /// This is like ObjectStart, but closing brace isn't valid here 684 /// Next state: ObjectBeforeColon. 685 /// </summary> 686 ObjectAfterComma = 1 << 7, 687 /// <summary> 688 /// [ ^ "foo", "bar" ] 689 /// Before the *first* value in an array. 690 /// Next states: 691 /// "AfterValue" (read a value) 692 /// "AfterValue" (end of array; will pop stack) 693 /// </summary> 694 ArrayStart = 1 << 8, 695 /// <summary> 696 /// [ "foo" ^ , "bar" ] 697 /// After any value in an array, so expecting either a comma or end-of-array 698 /// Next states: ArrayAfterComma or "AfterValue" 699 /// </summary> 700 ArrayAfterValue = 1 << 9, 701 /// <summary> 702 /// [ "foo", ^ "bar" ] 703 /// After a comma in an array, so there *must* be another value (simple or complex). 704 /// Next states: "AfterValue" (simple value), StartObject, StartArray 705 /// </summary> 706 ArrayAfterComma = 1 << 10 707 } 708 709 /// <summary> 710 /// Wrapper around a text reader allowing small amounts of buffering and location handling. 711 /// </summary> 712 private class PushBackReader 713 { 714 // TODO: Add locations for errors etc. 715 716 private readonly TextReader reader; 717 PushBackReader(TextReader reader)718 internal PushBackReader(TextReader reader) 719 { 720 // TODO: Wrap the reader in a BufferedReader? 721 this.reader = reader; 722 } 723 724 /// <summary> 725 /// The buffered next character, if we have one. 726 /// </summary> 727 private char? nextChar; 728 729 /// <summary> 730 /// Returns the next character in the stream, or null if we have reached the end. 731 /// </summary> 732 /// <returns></returns> Read()733 internal char? Read() 734 { 735 if (nextChar != null) 736 { 737 char? tmp = nextChar; 738 nextChar = null; 739 return tmp; 740 } 741 int next = reader.Read(); 742 return next == -1 ? null : (char?) next; 743 } 744 ReadOrFail(string messageOnFailure)745 internal char ReadOrFail(string messageOnFailure) 746 { 747 char? next = Read(); 748 if (next == null) 749 { 750 throw CreateException(messageOnFailure); 751 } 752 return next.Value; 753 } 754 PushBack(char c)755 internal void PushBack(char c) 756 { 757 if (nextChar != null) 758 { 759 throw new InvalidOperationException("Cannot push back when already buffering a character"); 760 } 761 nextChar = c; 762 } 763 764 /// <summary> 765 /// Creates a new exception appropriate for the current state of the reader. 766 /// </summary> CreateException(string message)767 internal InvalidJsonException CreateException(string message) 768 { 769 // TODO: Keep track of and use the location. 770 return new InvalidJsonException(message); 771 } 772 } 773 } 774 } 775 } 776