xref: /aosp_15_r20/external/cronet/third_party/protobuf/csharp/src/Google.Protobuf/JsonTokenizer.cs (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 #region Copyright notice and license
2 // Protocol Buffers - Google's data interchange format
3 // Copyright 2008 Google Inc.  All rights reserved.
4 // https://developers.google.com/protocol-buffers/
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #endregion
32 using System;
33 using System.Collections.Generic;
34 using System.Globalization;
35 using System.IO;
36 using System.Text;
37 
38 namespace Google.Protobuf
39 {
40     /// <summary>
41     /// Simple but strict JSON tokenizer, rigidly following RFC 7159.
42     /// </summary>
43     /// <remarks>
44     /// <para>
45     /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc.
46     /// It does not create tokens for the separator between names and values, or for the comma
47     /// between values. It validates the token stream as it goes - so callers can assume that the
48     /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
49     /// </para>
50     /// <para>Implementation details: the base class handles single token push-back and </para>
51     /// <para>Not thread-safe.</para>
52     /// </remarks>
53     internal abstract class JsonTokenizer
54     {
55         private JsonToken bufferedToken;
56 
57         /// <summary>
58         ///  Creates a tokenizer that reads from the given text reader.
59         /// </summary>
FromTextReader(TextReader reader)60         internal static JsonTokenizer FromTextReader(TextReader reader)
61         {
62             return new JsonTextTokenizer(reader);
63         }
64 
65         /// <summary>
66         /// Creates a tokenizer that first replays the given list of tokens, then continues reading
67         /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
68         /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
69         /// created for the sake of Any parsing.
70         /// </summary>
FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)71         internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
72         {
73             return new JsonReplayTokenizer(tokens, continuation);
74         }
75 
76         /// <summary>
77         /// Returns the depth of the stack, purely in objects (not collections).
78         /// Informally, this is the number of remaining unclosed '{' characters we have.
79         /// </summary>
80         internal int ObjectDepth { get; private set; }
81 
82         // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
83         // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
PushBack(JsonToken token)84         internal void PushBack(JsonToken token)
85         {
86             if (bufferedToken != null)
87             {
88                 throw new InvalidOperationException("Can't push back twice");
89             }
90             bufferedToken = token;
91             if (token.Type == JsonToken.TokenType.StartObject)
92             {
93                 ObjectDepth--;
94             }
95             else if (token.Type == JsonToken.TokenType.EndObject)
96             {
97                 ObjectDepth++;
98             }
99         }
100 
101         /// <summary>
102         /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
103         /// after which point <c>Next()</c> should not be called again.
104         /// </summary>
105         /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
106         /// <returns>The next token in the stream. This is never null.</returns>
107         /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
108         /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
Next()109         internal JsonToken Next()
110         {
111             JsonToken tokenToReturn;
112             if (bufferedToken != null)
113             {
114                 tokenToReturn = bufferedToken;
115                 bufferedToken = null;
116             }
117             else
118             {
119                 tokenToReturn = NextImpl();
120             }
121             if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
122             {
123                 ObjectDepth++;
124             }
125             else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
126             {
127                 ObjectDepth--;
128             }
129             return tokenToReturn;
130         }
131 
132         /// <summary>
133         /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
134         /// to this if it doesn't have a buffered token.)
135         /// </summary>
136         /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
137         /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
NextImpl()138         protected abstract JsonToken NextImpl();
139 
140         /// <summary>
141         /// Skips the value we're about to read. This must only be called immediately after reading a property name.
142         /// If the value is an object or an array, the complete object/array is skipped.
143         /// </summary>
SkipValue()144         internal void SkipValue()
145         {
146             // We'll assume that Next() makes sure that the end objects and end arrays are all valid.
147             // All we care about is the total nesting depth we need to close.
148             int depth = 0;
149 
150             // do/while rather than while loop so that we read at least one token.
151             do
152             {
153                 var token = Next();
154                 switch (token.Type)
155                 {
156                     case JsonToken.TokenType.EndArray:
157                     case JsonToken.TokenType.EndObject:
158                         depth--;
159                         break;
160                     case JsonToken.TokenType.StartArray:
161                     case JsonToken.TokenType.StartObject:
162                         depth++;
163                         break;
164                 }
165             } while (depth != 0);
166         }
167 
168         /// <summary>
169         /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
170         /// </summary>
171         private class JsonReplayTokenizer : JsonTokenizer
172         {
173             private readonly IList<JsonToken> tokens;
174             private readonly JsonTokenizer nextTokenizer;
175             private int nextTokenIndex;
176 
JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)177             internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
178             {
179                 this.tokens = tokens;
180                 this.nextTokenizer = nextTokenizer;
181             }
182 
183             // FIXME: Object depth not maintained...
NextImpl()184             protected override JsonToken NextImpl()
185             {
186                 if (nextTokenIndex >= tokens.Count)
187                 {
188                     return nextTokenizer.Next();
189                 }
190                 return tokens[nextTokenIndex++];
191             }
192         }
193 
194         /// <summary>
195         /// Tokenizer which does all the *real* work of parsing JSON.
196         /// </summary>
197         private sealed class JsonTextTokenizer : JsonTokenizer
198         {
199             // The set of states in which a value is valid next token.
200             private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
201 
202             private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
203             private readonly PushBackReader reader;
204             private State state;
205 
JsonTextTokenizer(TextReader reader)206             internal JsonTextTokenizer(TextReader reader)
207             {
208                 this.reader = new PushBackReader(reader);
209                 state = State.StartOfDocument;
210                 containerStack.Push(ContainerType.Document);
211             }
212 
213             /// <remarks>
214             /// This method essentially just loops through characters skipping whitespace, validating and
215             /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
216             /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
217             /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
218             /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
219             /// </remarks>
NextImpl()220             protected override JsonToken NextImpl()
221             {
222                 if (state == State.ReaderExhausted)
223                 {
224                     throw new InvalidOperationException("Next() called after end of document");
225                 }
226                 while (true)
227                 {
228                     var next = reader.Read();
229                     if (next == null)
230                     {
231                         ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
232                         state = State.ReaderExhausted;
233                         return JsonToken.EndDocument;
234                     }
235                     switch (next.Value)
236                     {
237                         // Skip whitespace between tokens
238                         case ' ':
239                         case '\t':
240                         case '\r':
241                         case '\n':
242                             break;
243                         case ':':
244                             ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: ");
245                             state = State.ObjectAfterColon;
246                             break;
247                         case ',':
248                             ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a comma: ");
249                             state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma;
250                             break;
251                         case '"':
252                             string stringValue = ReadString();
253                             if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0)
254                             {
255                                 state = State.ObjectBeforeColon;
256                                 return JsonToken.Name(stringValue);
257                             }
258                             else
259                             {
260                                 ValidateAndModifyStateForValue("Invalid state to read a double quote: ");
261                                 return JsonToken.Value(stringValue);
262                             }
263                         case '{':
264                             ValidateState(ValueStates, "Invalid state to read an open brace: ");
265                             state = State.ObjectStart;
266                             containerStack.Push(ContainerType.Object);
267                             return JsonToken.StartObject;
268                         case '}':
269                             ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
270                             PopContainer();
271                             return JsonToken.EndObject;
272                         case '[':
273                             ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
274                             state = State.ArrayStart;
275                             containerStack.Push(ContainerType.Array);
276                             return JsonToken.StartArray;
277                         case ']':
278                             ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: ");
279                             PopContainer();
280                             return JsonToken.EndArray;
281                         case 'n': // Start of null
282                             ConsumeLiteral("null");
283                             ValidateAndModifyStateForValue("Invalid state to read a null literal: ");
284                             return JsonToken.Null;
285                         case 't': // Start of true
286                             ConsumeLiteral("true");
287                             ValidateAndModifyStateForValue("Invalid state to read a true literal: ");
288                             return JsonToken.True;
289                         case 'f': // Start of false
290                             ConsumeLiteral("false");
291                             ValidateAndModifyStateForValue("Invalid state to read a false literal: ");
292                             return JsonToken.False;
293                         case '-': // Start of a number
294                         case '0':
295                         case '1':
296                         case '2':
297                         case '3':
298                         case '4':
299                         case '5':
300                         case '6':
301                         case '7':
302                         case '8':
303                         case '9':
304                             double number = ReadNumber(next.Value);
305                             ValidateAndModifyStateForValue("Invalid state to read a number token: ");
306                             return JsonToken.Value(number);
307                         default:
308                             throw new InvalidJsonException("Invalid first character of token: " + next.Value);
309                     }
310                 }
311             }
312 
ValidateState(State validStates, string errorPrefix)313             private void ValidateState(State validStates, string errorPrefix)
314             {
315                 if ((validStates & state) == 0)
316                 {
317                     throw reader.CreateException(errorPrefix + state);
318                 }
319             }
320 
321             /// <summary>
322             /// Reads a string token. It is assumed that the opening " has already been read.
323             /// </summary>
ReadString()324             private string ReadString()
325             {
326                 var value = new StringBuilder();
327                 bool haveHighSurrogate = false;
328                 while (true)
329                 {
330                     char c = reader.ReadOrFail("Unexpected end of text while reading string");
331                     if (c < ' ')
332                     {
333                         throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c));
334                     }
335                     if (c == '"')
336                     {
337                         if (haveHighSurrogate)
338                         {
339                             throw reader.CreateException("Invalid use of surrogate pair code units");
340                         }
341                         return value.ToString();
342                     }
343                     if (c == '\\')
344                     {
345                         c = ReadEscapedCharacter();
346                     }
347                     // TODO: Consider only allowing surrogate pairs that are either both escaped,
348                     // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate
349                     // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8.
350                     if (haveHighSurrogate != char.IsLowSurrogate(c))
351                     {
352                         throw reader.CreateException("Invalid use of surrogate pair code units");
353                     }
354                     haveHighSurrogate = char.IsHighSurrogate(c);
355                     value.Append(c);
356                 }
357             }
358 
359             /// <summary>
360             /// Reads an escaped character. It is assumed that the leading backslash has already been read.
361             /// </summary>
ReadEscapedCharacter()362             private char ReadEscapedCharacter()
363             {
364                 char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence");
365                 switch (c)
366                 {
367                     case 'n':
368                         return '\n';
369                     case '\\':
370                         return '\\';
371                     case 'b':
372                         return '\b';
373                     case 'f':
374                         return '\f';
375                     case 'r':
376                         return '\r';
377                     case 't':
378                         return '\t';
379                     case '"':
380                         return '"';
381                     case '/':
382                         return '/';
383                     case 'u':
384                         return ReadUnicodeEscape();
385                     default:
386                         throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
387                 }
388             }
389 
390             /// <summary>
391             /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read.
392             /// </summary>
ReadUnicodeEscape()393             private char ReadUnicodeEscape()
394             {
395                 int result = 0;
396                 for (int i = 0; i < 4; i++)
397                 {
398                     char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence");
399                     int nybble;
400                     if (c >= '0' && c <= '9')
401                     {
402                         nybble = c - '0';
403                     }
404                     else if (c >= 'a' && c <= 'f')
405                     {
406                         nybble = c - 'a' + 10;
407                     }
408                     else if (c >= 'A' && c <= 'F')
409                     {
410                         nybble = c - 'A' + 10;
411                     }
412                     else
413                     {
414                         throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
415                     }
416                     result = (result << 4) + nybble;
417                 }
418                 return (char) result;
419             }
420 
421             /// <summary>
422             /// Consumes a text-only literal, throwing an exception if the read text doesn't match it.
423             /// It is assumed that the first letter of the literal has already been read.
424             /// </summary>
ConsumeLiteral(string text)425             private void ConsumeLiteral(string text)
426             {
427                 for (int i = 1; i < text.Length; i++)
428                 {
429                     char? next = reader.Read();
430                     if (next == null)
431                     {
432                         throw reader.CreateException("Unexpected end of text while reading literal token " + text);
433                     }
434                     if (next.Value != text[i])
435                     {
436                         throw reader.CreateException("Unexpected character while reading literal token " + text);
437                     }
438                 }
439             }
440 
ReadNumber(char initialCharacter)441             private double ReadNumber(char initialCharacter)
442             {
443                 StringBuilder builder = new StringBuilder();
444                 if (initialCharacter == '-')
445                 {
446                     builder.Append("-");
447                 }
448                 else
449                 {
450                     reader.PushBack(initialCharacter);
451                 }
452                 // Each method returns the character it read that doesn't belong in that part,
453                 // so we know what to do next, including pushing the character back at the end.
454                 // null is returned for "end of text".
455                 char? next = ReadInt(builder);
456                 if (next == '.')
457                 {
458                     next = ReadFrac(builder);
459                 }
460                 if (next == 'e' || next == 'E')
461                 {
462                     next = ReadExp(builder);
463                 }
464                 // If we read a character which wasn't part of the number, push it back so we can read it again
465                 // to parse the next token.
466                 if (next != null)
467                 {
468                     reader.PushBack(next.Value);
469                 }
470 
471                 // TODO: What exception should we throw if the value can't be represented as a double?
472                 try
473                 {
474                     double result = double.Parse(builder.ToString(),
475                         NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent,
476                         CultureInfo.InvariantCulture);
477 
478                     // .NET Core 3.0 and later returns infinity if the number is too large or small to be represented.
479                     // For compatibility with other Protobuf implementations the tokenizer should still throw.
480                     if (double.IsInfinity(result))
481                     {
482                         throw reader.CreateException("Numeric value out of range: " + builder);
483                     }
484 
485                     return result;
486                 }
487                 catch (OverflowException)
488                 {
489                     throw reader.CreateException("Numeric value out of range: " + builder);
490                 }
491             }
492 
ReadInt(StringBuilder builder)493             private char? ReadInt(StringBuilder builder)
494             {
495                 char first = reader.ReadOrFail("Invalid numeric literal");
496                 if (first < '0' || first > '9')
497                 {
498                     throw reader.CreateException("Invalid numeric literal");
499                 }
500                 builder.Append(first);
501                 int digitCount;
502                 char? next = ConsumeDigits(builder, out digitCount);
503                 if (first == '0' && digitCount != 0)
504                 {
505                     throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
506                 }
507                 return next;
508             }
509 
ReadFrac(StringBuilder builder)510             private char? ReadFrac(StringBuilder builder)
511             {
512                 builder.Append('.'); // Already consumed this
513                 int digitCount;
514                 char? next = ConsumeDigits(builder, out digitCount);
515                 if (digitCount == 0)
516                 {
517                     throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
518                 }
519                 return next;
520             }
521 
ReadExp(StringBuilder builder)522             private char? ReadExp(StringBuilder builder)
523             {
524                 builder.Append('E'); // Already consumed this (or 'e')
525                 char? next = reader.Read();
526                 if (next == null)
527                 {
528                     throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
529                 }
530                 if (next == '-' || next == '+')
531                 {
532                     builder.Append(next.Value);
533                 }
534                 else
535                 {
536                     reader.PushBack(next.Value);
537                 }
538                 int digitCount;
539                 next = ConsumeDigits(builder, out digitCount);
540                 if (digitCount == 0)
541                 {
542                     throw reader.CreateException("Invalid numeric literal: exponent without value");
543                 }
544                 return next;
545             }
546 
ConsumeDigits(StringBuilder builder, out int count)547             private char? ConsumeDigits(StringBuilder builder, out int count)
548             {
549                 count = 0;
550                 while (true)
551                 {
552                     char? next = reader.Read();
553                     if (next == null || next.Value < '0' || next.Value > '9')
554                     {
555                         return next;
556                     }
557                     count++;
558                     builder.Append(next.Value);
559                 }
560             }
561 
562             /// <summary>
563             /// Validates that we're in a valid state to read a value (using the given error prefix if necessary)
564             /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty.
565             /// </summary>
ValidateAndModifyStateForValue(string errorPrefix)566             private void ValidateAndModifyStateForValue(string errorPrefix)
567             {
568                 ValidateState(ValueStates, errorPrefix);
569                 switch (state)
570                 {
571                     case State.StartOfDocument:
572                         state = State.ExpectedEndOfDocument;
573                         return;
574                     case State.ObjectAfterColon:
575                         state = State.ObjectAfterProperty;
576                         return;
577                     case State.ArrayStart:
578                     case State.ArrayAfterComma:
579                         state = State.ArrayAfterValue;
580                         return;
581                     default:
582                         throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)");
583                 }
584             }
585 
586             /// <summary>
587             /// Pops the top-most container, and sets the state to the appropriate one for the end of a value
588             /// in the parent container.
589             /// </summary>
PopContainer()590             private void PopContainer()
591             {
592                 containerStack.Pop();
593                 var parent = containerStack.Peek();
594                 switch (parent)
595                 {
596                     case ContainerType.Object:
597                         state = State.ObjectAfterProperty;
598                         break;
599                     case ContainerType.Array:
600                         state = State.ArrayAfterValue;
601                         break;
602                     case ContainerType.Document:
603                         state = State.ExpectedEndOfDocument;
604                         break;
605                     default:
606                         throw new InvalidOperationException("Unexpected container type: " + parent);
607                 }
608             }
609 
610             private enum ContainerType
611             {
612                 Document, Object, Array
613             }
614 
615             /// <summary>
616             /// Possible states of the tokenizer.
617             /// </summary>
618             /// <remarks>
619             /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states
620             /// for checking.</para>
621             /// <para>
622             /// Each is documented with an example,
623             /// where ^ represents the current position within the text stream. The examples all use string values,
624             /// but could be any value, including nested objects/arrays.
625             /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects).
626             /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which
627             /// point there's an immediate transition to ExpectedEndOfDocument,  ObjectAfterProperty or ArrayAfterValue.
628             /// </para>
629             /// <para>
630             /// These states were derived manually by reading RFC 7159 carefully.
631             /// </para>
632             /// </remarks>
633             [Flags]
634             private enum State
635             {
636                 /// <summary>
637                 /// ^ { "foo": "bar" }
638                 /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue"
639                 /// </summary>
640                 StartOfDocument = 1 << 0,
641                 /// <summary>
642                 /// { "foo": "bar" } ^
643                 /// After the value in a document. Next states: ReaderExhausted
644                 /// </summary>
645                 ExpectedEndOfDocument = 1 << 1,
646                 /// <summary>
647                 /// { "foo": "bar" } ^ (and already read to the end of the reader)
648                 /// Terminal state.
649                 /// </summary>
650                 ReaderExhausted = 1 << 2,
651                 /// <summary>
652                 /// { ^ "foo": "bar" }
653                 /// Before the *first* property in an object.
654                 /// Next states:
655                 /// "AfterValue" (empty object)
656                 /// ObjectBeforeColon (read a name)
657                 /// </summary>
658                 ObjectStart = 1 << 3,
659                 /// <summary>
660                 /// { "foo" ^ : "bar", "x": "y" }
661                 /// Next state: ObjectAfterColon
662                 /// </summary>
663                 ObjectBeforeColon = 1 << 4,
664                 /// <summary>
665                 /// { "foo" : ^ "bar", "x": "y" }
666                 /// Before any property other than the first in an object.
667                 /// (Equivalently: after any property in an object)
668                 /// Next states:
669                 /// "AfterValue" (value is simple)
670                 /// ObjectStart (value is object)
671                 /// ArrayStart (value is array)
672                 /// </summary>
673                 ObjectAfterColon = 1 << 5,
674                 /// <summary>
675                 /// { "foo" : "bar" ^ , "x" : "y" }
676                 /// At the end of a property, so expecting either a comma or end-of-object
677                 /// Next states: ObjectAfterComma or "AfterValue"
678                 /// </summary>
679                 ObjectAfterProperty = 1 << 6,
680                 /// <summary>
681                 /// { "foo":"bar", ^ "x":"y" }
682                 /// Read the comma after the previous property, so expecting another property.
683                 /// This is like ObjectStart, but closing brace isn't valid here
684                 /// Next state: ObjectBeforeColon.
685                 /// </summary>
686                 ObjectAfterComma = 1 << 7,
687                 /// <summary>
688                 /// [ ^ "foo", "bar" ]
689                 /// Before the *first* value in an array.
690                 /// Next states:
691                 /// "AfterValue" (read a value)
692                 /// "AfterValue" (end of array; will pop stack)
693                 /// </summary>
694                 ArrayStart = 1 << 8,
695                 /// <summary>
696                 /// [ "foo" ^ , "bar" ]
697                 /// After any value in an array, so expecting either a comma or end-of-array
698                 /// Next states: ArrayAfterComma or "AfterValue"
699                 /// </summary>
700                 ArrayAfterValue = 1 << 9,
701                 /// <summary>
702                 /// [ "foo", ^ "bar" ]
703                 /// After a comma in an array, so there *must* be another value (simple or complex).
704                 /// Next states: "AfterValue" (simple value), StartObject, StartArray
705                 /// </summary>
706                 ArrayAfterComma = 1 << 10
707             }
708 
709             /// <summary>
710             /// Wrapper around a text reader allowing small amounts of buffering and location handling.
711             /// </summary>
712             private class PushBackReader
713             {
714                 // TODO: Add locations for errors etc.
715 
716                 private readonly TextReader reader;
717 
PushBackReader(TextReader reader)718                 internal PushBackReader(TextReader reader)
719                 {
720                     // TODO: Wrap the reader in a BufferedReader?
721                     this.reader = reader;
722                 }
723 
724                 /// <summary>
725                 /// The buffered next character, if we have one.
726                 /// </summary>
727                 private char? nextChar;
728 
729                 /// <summary>
730                 /// Returns the next character in the stream, or null if we have reached the end.
731                 /// </summary>
732                 /// <returns></returns>
Read()733                 internal char? Read()
734                 {
735                     if (nextChar != null)
736                     {
737                         char? tmp = nextChar;
738                         nextChar = null;
739                         return tmp;
740                     }
741                     int next = reader.Read();
742                     return next == -1 ? null : (char?) next;
743                 }
744 
ReadOrFail(string messageOnFailure)745                 internal char ReadOrFail(string messageOnFailure)
746                 {
747                     char? next = Read();
748                     if (next == null)
749                     {
750                         throw CreateException(messageOnFailure);
751                     }
752                     return next.Value;
753                 }
754 
PushBack(char c)755                 internal void PushBack(char c)
756                 {
757                     if (nextChar != null)
758                     {
759                         throw new InvalidOperationException("Cannot push back when already buffering a character");
760                     }
761                     nextChar = c;
762                 }
763 
764                 /// <summary>
765                 /// Creates a new exception appropriate for the current state of the reader.
766                 /// </summary>
CreateException(string message)767                 internal InvalidJsonException CreateException(string message)
768                 {
769                     // TODO: Keep track of and use the location.
770                     return new InvalidJsonException(message);
771                 }
772             }
773         }
774     }
775 }
776