1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "upb/json/decode.h"
29 
30 #include <errno.h>
31 #include <float.h>
32 #include <inttypes.h>
33 #include <limits.h>
34 #include <math.h>
35 #include <stdlib.h>
36 #include <string.h>
37 
38 #include "upb/collections/map.h"
39 #include "upb/lex/atoi.h"
40 #include "upb/lex/unicode.h"
41 #include "upb/reflection/message.h"
42 #include "upb/wire/encode.h"
43 
44 // Must be last.
45 #include "upb/port/def.inc"
46 
47 typedef struct {
48   const char *ptr, *end;
49   upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
50   const upb_DefPool* symtab;
51   int depth;
52   upb_Status* status;
53   jmp_buf err;
54   int line;
55   const char* line_begin;
56   bool is_first;
57   int options;
58   const upb_FieldDef* debug_field;
59 } jsondec;
60 
61 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
62 
63 /* Forward declarations of mutually-recursive functions. */
64 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
65                               const upb_MessageDef* m);
66 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
67 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
68                                    const upb_MessageDef* m);
69 static void jsondec_object(jsondec* d, upb_Message* msg,
70                            const upb_MessageDef* m);
71 
jsondec_streql(upb_StringView str,const char * lit)72 static bool jsondec_streql(upb_StringView str, const char* lit) {
73   return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
74 }
75 
jsondec_isnullvalue(const upb_FieldDef * f)76 static bool jsondec_isnullvalue(const upb_FieldDef* f) {
77   return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
78          strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
79                 "google.protobuf.NullValue") == 0;
80 }
81 
jsondec_isvalue(const upb_FieldDef * f)82 static bool jsondec_isvalue(const upb_FieldDef* f) {
83   return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
84           upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
85               kUpb_WellKnown_Value) ||
86          jsondec_isnullvalue(f);
87 }
88 
jsondec_err(jsondec * d,const char * msg)89 UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
90   upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
91                             (int)(d->ptr - d->line_begin), msg);
92   UPB_LONGJMP(d->err, 1);
93 }
94 
95 UPB_PRINTF(2, 3)
jsondec_errf(jsondec * d,const char * fmt,...)96 UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
97   va_list argp;
98   upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
99                             (int)(d->ptr - d->line_begin));
100   va_start(argp, fmt);
101   upb_Status_VAppendErrorFormat(d->status, fmt, argp);
102   va_end(argp);
103   UPB_LONGJMP(d->err, 1);
104 }
105 
jsondec_skipws(jsondec * d)106 static void jsondec_skipws(jsondec* d) {
107   while (d->ptr != d->end) {
108     switch (*d->ptr) {
109       case '\n':
110         d->line++;
111         d->line_begin = d->ptr;
112         /* Fallthrough. */
113       case '\r':
114       case '\t':
115       case ' ':
116         d->ptr++;
117         break;
118       default:
119         return;
120     }
121   }
122   jsondec_err(d, "Unexpected EOF");
123 }
124 
jsondec_tryparsech(jsondec * d,char ch)125 static bool jsondec_tryparsech(jsondec* d, char ch) {
126   if (d->ptr == d->end || *d->ptr != ch) return false;
127   d->ptr++;
128   return true;
129 }
130 
jsondec_parselit(jsondec * d,const char * lit)131 static void jsondec_parselit(jsondec* d, const char* lit) {
132   size_t avail = d->end - d->ptr;
133   size_t len = strlen(lit);
134   if (avail < len || memcmp(d->ptr, lit, len) != 0) {
135     jsondec_errf(d, "Expected: '%s'", lit);
136   }
137   d->ptr += len;
138 }
139 
jsondec_wsch(jsondec * d,char ch)140 static void jsondec_wsch(jsondec* d, char ch) {
141   jsondec_skipws(d);
142   if (!jsondec_tryparsech(d, ch)) {
143     jsondec_errf(d, "Expected: '%c'", ch);
144   }
145 }
146 
jsondec_true(jsondec * d)147 static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)148 static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)149 static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
150 
jsondec_entrysep(jsondec * d)151 static void jsondec_entrysep(jsondec* d) {
152   jsondec_skipws(d);
153   jsondec_parselit(d, ":");
154 }
155 
jsondec_rawpeek(jsondec * d)156 static int jsondec_rawpeek(jsondec* d) {
157   switch (*d->ptr) {
158     case '{':
159       return JD_OBJECT;
160     case '[':
161       return JD_ARRAY;
162     case '"':
163       return JD_STRING;
164     case '-':
165     case '0':
166     case '1':
167     case '2':
168     case '3':
169     case '4':
170     case '5':
171     case '6':
172     case '7':
173     case '8':
174     case '9':
175       return JD_NUMBER;
176     case 't':
177       return JD_TRUE;
178     case 'f':
179       return JD_FALSE;
180     case 'n':
181       return JD_NULL;
182     default:
183       jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
184   }
185 }
186 
187 /* JSON object/array **********************************************************/
188 
189 /* These are used like so:
190  *
191  * jsondec_objstart(d);
192  * while (jsondec_objnext(d)) {
193  *   ...
194  * }
195  * jsondec_objend(d) */
196 
jsondec_peek(jsondec * d)197 static int jsondec_peek(jsondec* d) {
198   jsondec_skipws(d);
199   return jsondec_rawpeek(d);
200 }
201 
jsondec_push(jsondec * d)202 static void jsondec_push(jsondec* d) {
203   if (--d->depth < 0) {
204     jsondec_err(d, "Recursion limit exceeded");
205   }
206   d->is_first = true;
207 }
208 
jsondec_seqnext(jsondec * d,char end_ch)209 static bool jsondec_seqnext(jsondec* d, char end_ch) {
210   bool is_first = d->is_first;
211   d->is_first = false;
212   jsondec_skipws(d);
213   if (*d->ptr == end_ch) return false;
214   if (!is_first) jsondec_parselit(d, ",");
215   return true;
216 }
217 
jsondec_arrstart(jsondec * d)218 static void jsondec_arrstart(jsondec* d) {
219   jsondec_push(d);
220   jsondec_wsch(d, '[');
221 }
222 
jsondec_arrend(jsondec * d)223 static void jsondec_arrend(jsondec* d) {
224   d->depth++;
225   jsondec_wsch(d, ']');
226 }
227 
jsondec_arrnext(jsondec * d)228 static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
229 
jsondec_objstart(jsondec * d)230 static void jsondec_objstart(jsondec* d) {
231   jsondec_push(d);
232   jsondec_wsch(d, '{');
233 }
234 
jsondec_objend(jsondec * d)235 static void jsondec_objend(jsondec* d) {
236   d->depth++;
237   jsondec_wsch(d, '}');
238 }
239 
jsondec_objnext(jsondec * d)240 static bool jsondec_objnext(jsondec* d) {
241   if (!jsondec_seqnext(d, '}')) return false;
242   if (jsondec_peek(d) != JD_STRING) {
243     jsondec_err(d, "Object must start with string");
244   }
245   return true;
246 }
247 
248 /* JSON number ****************************************************************/
249 
jsondec_tryskipdigits(jsondec * d)250 static bool jsondec_tryskipdigits(jsondec* d) {
251   const char* start = d->ptr;
252 
253   while (d->ptr < d->end) {
254     if (*d->ptr < '0' || *d->ptr > '9') {
255       break;
256     }
257     d->ptr++;
258   }
259 
260   return d->ptr != start;
261 }
262 
jsondec_skipdigits(jsondec * d)263 static void jsondec_skipdigits(jsondec* d) {
264   if (!jsondec_tryskipdigits(d)) {
265     jsondec_err(d, "Expected one or more digits");
266   }
267 }
268 
jsondec_number(jsondec * d)269 static double jsondec_number(jsondec* d) {
270   const char* start = d->ptr;
271 
272   assert(jsondec_rawpeek(d) == JD_NUMBER);
273 
274   /* Skip over the syntax of a number, as specified by JSON. */
275   if (*d->ptr == '-') d->ptr++;
276 
277   if (jsondec_tryparsech(d, '0')) {
278     if (jsondec_tryskipdigits(d)) {
279       jsondec_err(d, "number cannot have leading zero");
280     }
281   } else {
282     jsondec_skipdigits(d);
283   }
284 
285   if (d->ptr == d->end) goto parse;
286   if (jsondec_tryparsech(d, '.')) {
287     jsondec_skipdigits(d);
288   }
289   if (d->ptr == d->end) goto parse;
290 
291   if (*d->ptr == 'e' || *d->ptr == 'E') {
292     d->ptr++;
293     if (d->ptr == d->end) {
294       jsondec_err(d, "Unexpected EOF in number");
295     }
296     if (*d->ptr == '+' || *d->ptr == '-') {
297       d->ptr++;
298     }
299     jsondec_skipdigits(d);
300   }
301 
302 parse:
303   /* Having verified the syntax of a JSON number, use strtod() to parse
304    * (strtod() accepts a superset of JSON syntax). */
305   errno = 0;
306   {
307     char* end;
308     double val = strtod(start, &end);
309     assert(end == d->ptr);
310 
311     /* Currently the min/max-val conformance tests fail if we check this.  Does
312      * this mean the conformance tests are wrong or strtod() is wrong, or
313      * something else?  Investigate further. */
314     /*
315     if (errno == ERANGE) {
316       jsondec_err(d, "Number out of range");
317     }
318     */
319 
320     if (val > DBL_MAX || val < -DBL_MAX) {
321       jsondec_err(d, "Number out of range");
322     }
323 
324     return val;
325   }
326 }
327 
328 /* JSON string ****************************************************************/
329 
jsondec_escape(jsondec * d)330 static char jsondec_escape(jsondec* d) {
331   switch (*d->ptr++) {
332     case '"':
333       return '\"';
334     case '\\':
335       return '\\';
336     case '/':
337       return '/';
338     case 'b':
339       return '\b';
340     case 'f':
341       return '\f';
342     case 'n':
343       return '\n';
344     case 'r':
345       return '\r';
346     case 't':
347       return '\t';
348     default:
349       jsondec_err(d, "Invalid escape char");
350   }
351 }
352 
jsondec_codepoint(jsondec * d)353 static uint32_t jsondec_codepoint(jsondec* d) {
354   uint32_t cp = 0;
355   const char* end;
356 
357   if (d->end - d->ptr < 4) {
358     jsondec_err(d, "EOF inside string");
359   }
360 
361   end = d->ptr + 4;
362   while (d->ptr < end) {
363     char ch = *d->ptr++;
364     if (ch >= '0' && ch <= '9') {
365       ch -= '0';
366     } else if (ch >= 'a' && ch <= 'f') {
367       ch = ch - 'a' + 10;
368     } else if (ch >= 'A' && ch <= 'F') {
369       ch = ch - 'A' + 10;
370     } else {
371       jsondec_err(d, "Invalid hex digit");
372     }
373     cp = (cp << 4) | ch;
374   }
375 
376   return cp;
377 }
378 
379 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)380 static size_t jsondec_unicode(jsondec* d, char* out) {
381   uint32_t cp = jsondec_codepoint(d);
382   if (upb_Unicode_IsHigh(cp)) {
383     /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
384     jsondec_parselit(d, "\\u");
385     uint32_t low = jsondec_codepoint(d);
386     if (!upb_Unicode_IsLow(low)) jsondec_err(d, "Invalid low surrogate");
387     cp = upb_Unicode_FromPair(cp, low);
388   } else if (upb_Unicode_IsLow(cp)) {
389     jsondec_err(d, "Unpaired low surrogate");
390   }
391 
392   /* Write to UTF-8 */
393   int bytes = upb_Unicode_ToUTF8(cp, out);
394   if (bytes == 0) jsondec_err(d, "Invalid codepoint");
395   return bytes;
396 }
397 
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)398 static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
399   size_t oldsize = *buf_end - *buf;
400   size_t len = *end - *buf;
401   size_t size = UPB_MAX(8, 2 * oldsize);
402 
403   *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
404   if (!*buf) jsondec_err(d, "Out of memory");
405 
406   *end = *buf + len;
407   *buf_end = *buf + size;
408 }
409 
jsondec_string(jsondec * d)410 static upb_StringView jsondec_string(jsondec* d) {
411   char* buf = NULL;
412   char* end = NULL;
413   char* buf_end = NULL;
414 
415   jsondec_skipws(d);
416 
417   if (*d->ptr++ != '"') {
418     jsondec_err(d, "Expected string");
419   }
420 
421   while (d->ptr < d->end) {
422     char ch = *d->ptr++;
423 
424     if (end == buf_end) {
425       jsondec_resize(d, &buf, &end, &buf_end);
426     }
427 
428     switch (ch) {
429       case '"': {
430         upb_StringView ret;
431         ret.data = buf;
432         ret.size = end - buf;
433         *end = '\0'; /* Needed for possible strtod(). */
434         return ret;
435       }
436       case '\\':
437         if (d->ptr == d->end) goto eof;
438         if (*d->ptr == 'u') {
439           d->ptr++;
440           if (buf_end - end < 4) {
441             /* Allow space for maximum-sized codepoint (4 bytes). */
442             jsondec_resize(d, &buf, &end, &buf_end);
443           }
444           end += jsondec_unicode(d, end);
445         } else {
446           *end++ = jsondec_escape(d);
447         }
448         break;
449       default:
450         if ((unsigned char)*d->ptr < 0x20) {
451           jsondec_err(d, "Invalid char in JSON string");
452         }
453         *end++ = ch;
454         break;
455     }
456   }
457 
458 eof:
459   jsondec_err(d, "EOF inside string");
460 }
461 
jsondec_skipval(jsondec * d)462 static void jsondec_skipval(jsondec* d) {
463   switch (jsondec_peek(d)) {
464     case JD_OBJECT:
465       jsondec_objstart(d);
466       while (jsondec_objnext(d)) {
467         jsondec_string(d);
468         jsondec_entrysep(d);
469         jsondec_skipval(d);
470       }
471       jsondec_objend(d);
472       break;
473     case JD_ARRAY:
474       jsondec_arrstart(d);
475       while (jsondec_arrnext(d)) {
476         jsondec_skipval(d);
477       }
478       jsondec_arrend(d);
479       break;
480     case JD_TRUE:
481       jsondec_true(d);
482       break;
483     case JD_FALSE:
484       jsondec_false(d);
485       break;
486     case JD_NULL:
487       jsondec_null(d);
488       break;
489     case JD_STRING:
490       jsondec_string(d);
491       break;
492     case JD_NUMBER:
493       jsondec_number(d);
494       break;
495   }
496 }
497 
498 /* Base64 decoding for bytes fields. ******************************************/
499 
jsondec_base64_tablelookup(const char ch)500 static unsigned int jsondec_base64_tablelookup(const char ch) {
501   /* Table includes the normal base64 chars plus the URL-safe variant. */
502   const signed char table[256] = {
503       -1,       -1,       -1,       -1,       -1,       -1,        -1,
504       -1,       -1,       -1,       -1,       -1,       -1,        -1,
505       -1,       -1,       -1,       -1,       -1,       -1,        -1,
506       -1,       -1,       -1,       -1,       -1,       -1,        -1,
507       -1,       -1,       -1,       -1,       -1,       -1,        -1,
508       -1,       -1,       -1,       -1,       -1,       -1,        -1,
509       -1,       62 /*+*/, -1,       62 /*-*/, -1,       63 /*/ */, 52 /*0*/,
510       53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/,  59 /*7*/,
511       60 /*8*/, 61 /*9*/, -1,       -1,       -1,       -1,        -1,
512       -1,       -1,       0 /*A*/,  1 /*B*/,  2 /*C*/,  3 /*D*/,   4 /*E*/,
513       5 /*F*/,  6 /*G*/,  07 /*H*/, 8 /*I*/,  9 /*J*/,  10 /*K*/,  11 /*L*/,
514       12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/,  18 /*S*/,
515       19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/,  25 /*Z*/,
516       -1,       -1,       -1,       -1,       63 /*_*/, -1,        26 /*a*/,
517       27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/,  33 /*h*/,
518       34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/,  40 /*o*/,
519       41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/,  47 /*v*/,
520       48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1,       -1,        -1,
521       -1,       -1,       -1,       -1,       -1,       -1,        -1,
522       -1,       -1,       -1,       -1,       -1,       -1,        -1,
523       -1,       -1,       -1,       -1,       -1,       -1,        -1,
524       -1,       -1,       -1,       -1,       -1,       -1,        -1,
525       -1,       -1,       -1,       -1,       -1,       -1,        -1,
526       -1,       -1,       -1,       -1,       -1,       -1,        -1,
527       -1,       -1,       -1,       -1,       -1,       -1,        -1,
528       -1,       -1,       -1,       -1,       -1,       -1,        -1,
529       -1,       -1,       -1,       -1,       -1,       -1,        -1,
530       -1,       -1,       -1,       -1,       -1,       -1,        -1,
531       -1,       -1,       -1,       -1,       -1,       -1,        -1,
532       -1,       -1,       -1,       -1,       -1,       -1,        -1,
533       -1,       -1,       -1,       -1,       -1,       -1,        -1,
534       -1,       -1,       -1,       -1,       -1,       -1,        -1,
535       -1,       -1,       -1,       -1,       -1,       -1,        -1,
536       -1,       -1,       -1,       -1,       -1,       -1,        -1,
537       -1,       -1,       -1,       -1,       -1,       -1,        -1,
538       -1,       -1,       -1,       -1,       -1,       -1,        -1,
539       -1,       -1,       -1,       -1};
540 
541   /* Sign-extend return value so high bit will be set on any unexpected char. */
542   return table[(unsigned)ch];
543 }
544 
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)545 static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
546                                    char* out) {
547   int32_t val = -1;
548 
549   switch (end - ptr) {
550     case 2:
551       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
552             jsondec_base64_tablelookup(ptr[1]) << 12;
553       out[0] = val >> 16;
554       out += 1;
555       break;
556     case 3:
557       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
558             jsondec_base64_tablelookup(ptr[1]) << 12 |
559             jsondec_base64_tablelookup(ptr[2]) << 6;
560       out[0] = val >> 16;
561       out[1] = (val >> 8) & 0xff;
562       out += 2;
563       break;
564   }
565 
566   if (val < 0) {
567     jsondec_err(d, "Corrupt base64");
568   }
569 
570   return out;
571 }
572 
jsondec_base64(jsondec * d,upb_StringView str)573 static size_t jsondec_base64(jsondec* d, upb_StringView str) {
574   /* We decode in place. This is safe because this is a new buffer (not
575    * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
576   char* out = (char*)str.data;
577   const char* ptr = str.data;
578   const char* end = ptr + str.size;
579   const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
580 
581   for (; ptr < end4; ptr += 4, out += 3) {
582     int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
583               jsondec_base64_tablelookup(ptr[1]) << 12 |
584               jsondec_base64_tablelookup(ptr[2]) << 6 |
585               jsondec_base64_tablelookup(ptr[3]) << 0;
586 
587     if (val < 0) {
588       /* Junk chars or padding. Remove trailing padding, if any. */
589       if (end - ptr == 4 && ptr[3] == '=') {
590         if (ptr[2] == '=') {
591           end -= 2;
592         } else {
593           end -= 1;
594         }
595       }
596       break;
597     }
598 
599     out[0] = val >> 16;
600     out[1] = (val >> 8) & 0xff;
601     out[2] = val & 0xff;
602   }
603 
604   if (ptr < end) {
605     /* Process remaining chars. We do not require padding. */
606     out = jsondec_partialbase64(d, ptr, end, out);
607   }
608 
609   return out - str.data;
610 }
611 
612 /* Low-level integer parsing **************************************************/
613 
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)614 static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
615                                        const char* end, uint64_t* val) {
616   const char* out = upb_BufToUint64(ptr, end, val);
617   if (!out) jsondec_err(d, "Integer overflow");
618   return out;
619 }
620 
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val,bool * is_neg)621 static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
622                                       const char* end, int64_t* val,
623                                       bool* is_neg) {
624   const char* out = upb_BufToInt64(ptr, end, val, is_neg);
625   if (!out) jsondec_err(d, "Integer overflow");
626   return out;
627 }
628 
jsondec_strtouint64(jsondec * d,upb_StringView str)629 static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
630   const char* end = str.data + str.size;
631   uint64_t ret;
632   if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
633     jsondec_err(d, "Non-number characters in quoted integer");
634   }
635   return ret;
636 }
637 
jsondec_strtoint64(jsondec * d,upb_StringView str)638 static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
639   const char* end = str.data + str.size;
640   int64_t ret;
641   if (jsondec_buftoint64(d, str.data, end, &ret, NULL) != end) {
642     jsondec_err(d, "Non-number characters in quoted integer");
643   }
644   return ret;
645 }
646 
647 /* Primitive value types ******************************************************/
648 
649 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_FieldDef * f)650 static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
651   upb_MessageValue val;
652 
653   switch (jsondec_peek(d)) {
654     case JD_NUMBER: {
655       double dbl = jsondec_number(d);
656       if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
657         jsondec_err(d, "JSON number is out of range.");
658       }
659       val.int64_val = dbl; /* must be guarded, overflow here is UB */
660       if (val.int64_val != dbl) {
661         jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
662                      val.int64_val);
663       }
664       break;
665     }
666     case JD_STRING: {
667       upb_StringView str = jsondec_string(d);
668       val.int64_val = jsondec_strtoint64(d, str);
669       break;
670     }
671     default:
672       jsondec_err(d, "Expected number or string");
673   }
674 
675   if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
676       upb_FieldDef_CType(f) == kUpb_CType_Enum) {
677     if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
678       jsondec_err(d, "Integer out of range.");
679     }
680     val.int32_val = (int32_t)val.int64_val;
681   }
682 
683   return val;
684 }
685 
686 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_FieldDef * f)687 static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
688   upb_MessageValue val = {0};
689 
690   switch (jsondec_peek(d)) {
691     case JD_NUMBER: {
692       double dbl = jsondec_number(d);
693       if (dbl > 18446744073709549568.0 || dbl < 0) {
694         jsondec_err(d, "JSON number is out of range.");
695       }
696       val.uint64_val = dbl; /* must be guarded, overflow here is UB */
697       if (val.uint64_val != dbl) {
698         jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
699                      val.uint64_val);
700       }
701       break;
702     }
703     case JD_STRING: {
704       upb_StringView str = jsondec_string(d);
705       val.uint64_val = jsondec_strtouint64(d, str);
706       break;
707     }
708     default:
709       jsondec_err(d, "Expected number or string");
710   }
711 
712   if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
713     if (val.uint64_val > UINT32_MAX) {
714       jsondec_err(d, "Integer out of range.");
715     }
716     val.uint32_val = (uint32_t)val.uint64_val;
717   }
718 
719   return val;
720 }
721 
722 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_FieldDef * f)723 static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
724   upb_StringView str;
725   upb_MessageValue val = {0};
726 
727   switch (jsondec_peek(d)) {
728     case JD_NUMBER:
729       val.double_val = jsondec_number(d);
730       break;
731     case JD_STRING:
732       str = jsondec_string(d);
733       if (jsondec_streql(str, "NaN")) {
734         val.double_val = NAN;
735       } else if (jsondec_streql(str, "Infinity")) {
736         val.double_val = INFINITY;
737       } else if (jsondec_streql(str, "-Infinity")) {
738         val.double_val = -INFINITY;
739       } else {
740         val.double_val = strtod(str.data, NULL);
741       }
742       break;
743     default:
744       jsondec_err(d, "Expected number or string");
745   }
746 
747   if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
748     float f = val.double_val;
749     if (val.double_val != INFINITY && val.double_val != -INFINITY) {
750       if (f == INFINITY || f == -INFINITY) jsondec_err(d, "Float out of range");
751     }
752     val.float_val = f;
753   }
754 
755   return val;
756 }
757 
758 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_FieldDef * f)759 static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
760   upb_MessageValue val;
761   val.str_val = jsondec_string(d);
762   if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
763     val.str_val.size = jsondec_base64(d, val.str_val);
764   }
765   return val;
766 }
767 
jsondec_enum(jsondec * d,const upb_FieldDef * f)768 static upb_MessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
769   switch (jsondec_peek(d)) {
770     case JD_STRING: {
771       upb_StringView str = jsondec_string(d);
772       const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
773       const upb_EnumValueDef* ev =
774           upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
775       upb_MessageValue val;
776       if (ev) {
777         val.int32_val = upb_EnumValueDef_Number(ev);
778       } else {
779         if (d->options & upb_JsonDecode_IgnoreUnknown) {
780           val.int32_val = 0;
781         } else {
782           jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
783                        UPB_STRINGVIEW_ARGS(str));
784         }
785       }
786       return val;
787     }
788     case JD_NULL: {
789       if (jsondec_isnullvalue(f)) {
790         upb_MessageValue val;
791         jsondec_null(d);
792         val.int32_val = 0;
793         return val;
794       }
795     }
796       /* Fallthrough. */
797     default:
798       return jsondec_int(d, f);
799   }
800 }
801 
jsondec_bool(jsondec * d,const upb_FieldDef * f)802 static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
803   bool is_map_key = upb_FieldDef_Number(f) == 1 &&
804                     upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
805   upb_MessageValue val;
806 
807   if (is_map_key) {
808     upb_StringView str = jsondec_string(d);
809     if (jsondec_streql(str, "true")) {
810       val.bool_val = true;
811     } else if (jsondec_streql(str, "false")) {
812       val.bool_val = false;
813     } else {
814       jsondec_err(d, "Invalid boolean map key");
815     }
816   } else {
817     switch (jsondec_peek(d)) {
818       case JD_TRUE:
819         val.bool_val = true;
820         jsondec_true(d);
821         break;
822       case JD_FALSE:
823         val.bool_val = false;
824         jsondec_false(d);
825         break;
826       default:
827         jsondec_err(d, "Expected true or false");
828     }
829   }
830 
831   return val;
832 }
833 
834 /* Composite types (array/message/map) ****************************************/
835 
jsondec_array(jsondec * d,upb_Message * msg,const upb_FieldDef * f)836 static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
837   upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
838 
839   jsondec_arrstart(d);
840   while (jsondec_arrnext(d)) {
841     upb_MessageValue elem = jsondec_value(d, f);
842     upb_Array_Append(arr, elem, d->arena);
843   }
844   jsondec_arrend(d);
845 }
846 
jsondec_map(jsondec * d,upb_Message * msg,const upb_FieldDef * f)847 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
848   upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
849   const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
850   const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
851   const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
852 
853   jsondec_objstart(d);
854   while (jsondec_objnext(d)) {
855     upb_MessageValue key, val;
856     key = jsondec_value(d, key_f);
857     jsondec_entrysep(d);
858     val = jsondec_value(d, val_f);
859     upb_Map_Set(map, key, val, d->arena);
860   }
861   jsondec_objend(d);
862 }
863 
jsondec_tomsg(jsondec * d,upb_Message * msg,const upb_MessageDef * m)864 static void jsondec_tomsg(jsondec* d, upb_Message* msg,
865                           const upb_MessageDef* m) {
866   if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
867     jsondec_object(d, msg, m);
868   } else {
869     jsondec_wellknown(d, msg, m);
870   }
871 }
872 
jsondec_msg(jsondec * d,const upb_FieldDef * f)873 static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
874   const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
875   const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
876   upb_Message* msg = upb_Message_New(layout, d->arena);
877   upb_MessageValue val;
878 
879   jsondec_tomsg(d, msg, m);
880   val.msg_val = msg;
881   return val;
882 }
883 
jsondec_field(jsondec * d,upb_Message * msg,const upb_MessageDef * m)884 static void jsondec_field(jsondec* d, upb_Message* msg,
885                           const upb_MessageDef* m) {
886   upb_StringView name;
887   const upb_FieldDef* f;
888   const upb_FieldDef* preserved;
889 
890   name = jsondec_string(d);
891   jsondec_entrysep(d);
892 
893   if (name.size >= 2 && name.data[0] == '[' &&
894       name.data[name.size - 1] == ']') {
895     f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
896                                                 name.size - 2);
897     if (f && upb_FieldDef_ContainingType(f) != m) {
898       jsondec_errf(
899           d, "Extension %s extends message %s, but was seen in message %s",
900           upb_FieldDef_FullName(f),
901           upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
902           upb_MessageDef_FullName(m));
903     }
904   } else {
905     f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
906   }
907 
908   if (!f) {
909     if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
910       jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
911                    UPB_STRINGVIEW_ARGS(name));
912     }
913     jsondec_skipval(d);
914     return;
915   }
916 
917   if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
918     /* JSON "null" indicates a default value, so no need to set anything. */
919     jsondec_null(d);
920     return;
921   }
922 
923   if (upb_FieldDef_RealContainingOneof(f) &&
924       upb_Message_WhichOneof(msg, upb_FieldDef_ContainingOneof(f))) {
925     jsondec_err(d, "More than one field for this oneof.");
926   }
927 
928   preserved = d->debug_field;
929   d->debug_field = f;
930 
931   if (upb_FieldDef_IsMap(f)) {
932     jsondec_map(d, msg, f);
933   } else if (upb_FieldDef_IsRepeated(f)) {
934     jsondec_array(d, msg, f);
935   } else if (upb_FieldDef_IsSubMessage(f)) {
936     upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
937     const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
938     jsondec_tomsg(d, submsg, subm);
939   } else {
940     upb_MessageValue val = jsondec_value(d, f);
941     upb_Message_SetFieldByDef(msg, f, val, d->arena);
942   }
943 
944   d->debug_field = preserved;
945 }
946 
jsondec_object(jsondec * d,upb_Message * msg,const upb_MessageDef * m)947 static void jsondec_object(jsondec* d, upb_Message* msg,
948                            const upb_MessageDef* m) {
949   jsondec_objstart(d);
950   while (jsondec_objnext(d)) {
951     jsondec_field(d, msg, m);
952   }
953   jsondec_objend(d);
954 }
955 
jsondec_value(jsondec * d,const upb_FieldDef * f)956 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
957   switch (upb_FieldDef_CType(f)) {
958     case kUpb_CType_Bool:
959       return jsondec_bool(d, f);
960     case kUpb_CType_Float:
961     case kUpb_CType_Double:
962       return jsondec_double(d, f);
963     case kUpb_CType_UInt32:
964     case kUpb_CType_UInt64:
965       return jsondec_uint(d, f);
966     case kUpb_CType_Int32:
967     case kUpb_CType_Int64:
968       return jsondec_int(d, f);
969     case kUpb_CType_String:
970     case kUpb_CType_Bytes:
971       return jsondec_strfield(d, f);
972     case kUpb_CType_Enum:
973       return jsondec_enum(d, f);
974     case kUpb_CType_Message:
975       return jsondec_msg(d, f);
976     default:
977       UPB_UNREACHABLE();
978   }
979 }
980 
981 /* Well-known types ***********************************************************/
982 
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)983 static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
984                             const char* after) {
985   uint64_t val;
986   const char* p = *ptr;
987   const char* end = p + digits;
988   size_t after_len = after ? strlen(after) : 0;
989 
990   UPB_ASSERT(digits <= 9); /* int can't overflow. */
991 
992   if (jsondec_buftouint64(d, p, end, &val) != end ||
993       (after_len && memcmp(end, after, after_len) != 0)) {
994     jsondec_err(d, "Malformed timestamp");
995   }
996 
997   UPB_ASSERT(val < INT_MAX);
998 
999   *ptr = end + after_len;
1000   return (int)val;
1001 }
1002 
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)1003 static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
1004   uint64_t nanos = 0;
1005   const char* p = *ptr;
1006 
1007   if (p != end && *p == '.') {
1008     const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1009     int digits = (int)(nano_end - p - 1);
1010     int exp_lg10 = 9 - digits;
1011     if (digits > 9) {
1012       jsondec_err(d, "Too many digits for partial seconds");
1013     }
1014     while (exp_lg10--) nanos *= 10;
1015     *ptr = nano_end;
1016   }
1017 
1018   UPB_ASSERT(nanos < INT_MAX);
1019 
1020   return (int)nanos;
1021 }
1022 
1023 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)1024 int jsondec_epochdays(int y, int m, int d) {
1025   const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
1026   const uint32_t m_adj = m - 3;    /* March-based month. */
1027   const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1028   const uint32_t adjust = carry ? 12 : 0;
1029   const uint32_t y_adj = y + year_base - carry;
1030   const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1031   const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1032   return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1033 }
1034 
jsondec_unixtime(int y,int m,int d,int h,int min,int s)1035 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1036   return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1037 }
1038 
jsondec_timestamp(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1039 static void jsondec_timestamp(jsondec* d, upb_Message* msg,
1040                               const upb_MessageDef* m) {
1041   upb_MessageValue seconds;
1042   upb_MessageValue nanos;
1043   upb_StringView str = jsondec_string(d);
1044   const char* ptr = str.data;
1045   const char* end = ptr + str.size;
1046 
1047   if (str.size < 20) goto malformed;
1048 
1049   {
1050     /* 1972-01-01T01:00:00 */
1051     int year = jsondec_tsdigits(d, &ptr, 4, "-");
1052     int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1053     int day = jsondec_tsdigits(d, &ptr, 2, "T");
1054     int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1055     int min = jsondec_tsdigits(d, &ptr, 2, ":");
1056     int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1057 
1058     seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1059   }
1060 
1061   nanos.int32_val = jsondec_nanos(d, &ptr, end);
1062 
1063   {
1064     /* [+-]08:00 or Z */
1065     int ofs_hour = 0;
1066     int ofs_min = 0;
1067     bool neg = false;
1068 
1069     if (ptr == end) goto malformed;
1070 
1071     switch (*ptr++) {
1072       case '-':
1073         neg = true;
1074         /* fallthrough */
1075       case '+':
1076         if ((end - ptr) != 5) goto malformed;
1077         ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
1078         ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
1079         ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
1080         seconds.int64_val += (neg ? ofs_min : -ofs_min);
1081         break;
1082       case 'Z':
1083         if (ptr != end) goto malformed;
1084         break;
1085       default:
1086         goto malformed;
1087     }
1088   }
1089 
1090   if (seconds.int64_val < -62135596800) {
1091     jsondec_err(d, "Timestamp out of range");
1092   }
1093 
1094   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1095                             seconds, d->arena);
1096   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1097                             d->arena);
1098   return;
1099 
1100 malformed:
1101   jsondec_err(d, "Malformed timestamp");
1102 }
1103 
jsondec_duration(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1104 static void jsondec_duration(jsondec* d, upb_Message* msg,
1105                              const upb_MessageDef* m) {
1106   upb_MessageValue seconds;
1107   upb_MessageValue nanos;
1108   upb_StringView str = jsondec_string(d);
1109   const char* ptr = str.data;
1110   const char* end = ptr + str.size;
1111   const int64_t max = (uint64_t)3652500 * 86400;
1112   bool neg = false;
1113 
1114   /* "3.000000001s", "3s", etc. */
1115   ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val, &neg);
1116   nanos.int32_val = jsondec_nanos(d, &ptr, end);
1117 
1118   if (end - ptr != 1 || *ptr != 's') {
1119     jsondec_err(d, "Malformed duration");
1120   }
1121 
1122   if (seconds.int64_val < -max || seconds.int64_val > max) {
1123     jsondec_err(d, "Duration out of range");
1124   }
1125 
1126   if (neg) {
1127     nanos.int32_val = -nanos.int32_val;
1128   }
1129 
1130   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1131                             seconds, d->arena);
1132   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1133                             d->arena);
1134 }
1135 
jsondec_listvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1136 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
1137                               const upb_MessageDef* m) {
1138   const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
1139   const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
1140   const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1141   upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
1142 
1143   jsondec_arrstart(d);
1144   while (jsondec_arrnext(d)) {
1145     upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1146     upb_MessageValue value;
1147     value.msg_val = value_msg;
1148     upb_Array_Append(values, value, d->arena);
1149     jsondec_wellknownvalue(d, value_msg, value_m);
1150   }
1151   jsondec_arrend(d);
1152 }
1153 
jsondec_struct(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1154 static void jsondec_struct(jsondec* d, upb_Message* msg,
1155                            const upb_MessageDef* m) {
1156   const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
1157   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
1158   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
1159   const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
1160   const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1161   upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
1162 
1163   jsondec_objstart(d);
1164   while (jsondec_objnext(d)) {
1165     upb_MessageValue key, value;
1166     upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1167     key.str_val = jsondec_string(d);
1168     value.msg_val = value_msg;
1169     upb_Map_Set(fields, key, value, d->arena);
1170     jsondec_entrysep(d);
1171     jsondec_wellknownvalue(d, value_msg, value_m);
1172   }
1173   jsondec_objend(d);
1174 }
1175 
jsondec_wellknownvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1176 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
1177                                    const upb_MessageDef* m) {
1178   upb_MessageValue val;
1179   const upb_FieldDef* f;
1180   upb_Message* submsg;
1181 
1182   switch (jsondec_peek(d)) {
1183     case JD_NUMBER:
1184       /* double number_value = 2; */
1185       f = upb_MessageDef_FindFieldByNumber(m, 2);
1186       val.double_val = jsondec_number(d);
1187       break;
1188     case JD_STRING:
1189       /* string string_value = 3; */
1190       f = upb_MessageDef_FindFieldByNumber(m, 3);
1191       val.str_val = jsondec_string(d);
1192       break;
1193     case JD_FALSE:
1194       /* bool bool_value = 4; */
1195       f = upb_MessageDef_FindFieldByNumber(m, 4);
1196       val.bool_val = false;
1197       jsondec_false(d);
1198       break;
1199     case JD_TRUE:
1200       /* bool bool_value = 4; */
1201       f = upb_MessageDef_FindFieldByNumber(m, 4);
1202       val.bool_val = true;
1203       jsondec_true(d);
1204       break;
1205     case JD_NULL:
1206       /* NullValue null_value = 1; */
1207       f = upb_MessageDef_FindFieldByNumber(m, 1);
1208       val.int32_val = 0;
1209       jsondec_null(d);
1210       break;
1211     /* Note: these cases return, because upb_Message_Mutable() is enough. */
1212     case JD_OBJECT:
1213       /* Struct struct_value = 5; */
1214       f = upb_MessageDef_FindFieldByNumber(m, 5);
1215       submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1216       jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
1217       return;
1218     case JD_ARRAY:
1219       /* ListValue list_value = 6; */
1220       f = upb_MessageDef_FindFieldByNumber(m, 6);
1221       submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1222       jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
1223       return;
1224     default:
1225       UPB_UNREACHABLE();
1226   }
1227 
1228   upb_Message_SetFieldByDef(msg, f, val, d->arena);
1229 }
1230 
jsondec_mask(jsondec * d,const char * buf,const char * end)1231 static upb_StringView jsondec_mask(jsondec* d, const char* buf,
1232                                    const char* end) {
1233   /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1234    * transform in place. */
1235   const char* ptr = buf;
1236   upb_StringView ret;
1237   char* out;
1238 
1239   ret.size = end - ptr;
1240   while (ptr < end) {
1241     ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1242     ptr++;
1243   }
1244 
1245   out = upb_Arena_Malloc(d->arena, ret.size);
1246   ptr = buf;
1247   ret.data = out;
1248 
1249   while (ptr < end) {
1250     char ch = *ptr++;
1251     if (ch >= 'A' && ch <= 'Z') {
1252       *out++ = '_';
1253       *out++ = ch + 32;
1254     } else if (ch == '_') {
1255       jsondec_err(d, "field mask may not contain '_'");
1256     } else {
1257       *out++ = ch;
1258     }
1259   }
1260 
1261   return ret;
1262 }
1263 
jsondec_fieldmask(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1264 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
1265                               const upb_MessageDef* m) {
1266   /* repeated string paths = 1; */
1267   const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
1268   upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
1269   upb_StringView str = jsondec_string(d);
1270   const char* ptr = str.data;
1271   const char* end = ptr + str.size;
1272   upb_MessageValue val;
1273 
1274   while (ptr < end) {
1275     const char* elem_end = memchr(ptr, ',', end - ptr);
1276     if (elem_end) {
1277       val.str_val = jsondec_mask(d, ptr, elem_end);
1278       ptr = elem_end + 1;
1279     } else {
1280       val.str_val = jsondec_mask(d, ptr, end);
1281       ptr = end;
1282     }
1283     upb_Array_Append(arr, val, d->arena);
1284   }
1285 }
1286 
jsondec_anyfield(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1287 static void jsondec_anyfield(jsondec* d, upb_Message* msg,
1288                              const upb_MessageDef* m) {
1289   if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
1290     /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1291      * where f1, f2, etc. are the normal fields of this type. */
1292     jsondec_field(d, msg, m);
1293   } else {
1294     /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1295      * where <X> is whatever encoding the WKT normally uses. */
1296     upb_StringView str = jsondec_string(d);
1297     jsondec_entrysep(d);
1298     if (!jsondec_streql(str, "value")) {
1299       jsondec_err(d, "Key for well-known type must be 'value'");
1300     }
1301     jsondec_wellknown(d, msg, m);
1302   }
1303 }
1304 
jsondec_typeurl(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1305 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
1306                                              const upb_MessageDef* m) {
1307   const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
1308   const upb_MessageDef* type_m;
1309   upb_StringView type_url = jsondec_string(d);
1310   const char* end = type_url.data + type_url.size;
1311   const char* ptr = end;
1312   upb_MessageValue val;
1313 
1314   val.str_val = type_url;
1315   upb_Message_SetFieldByDef(msg, type_url_f, val, d->arena);
1316 
1317   /* Find message name after the last '/' */
1318   while (ptr > type_url.data && *--ptr != '/') {
1319   }
1320 
1321   if (ptr == type_url.data || ptr == end) {
1322     jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1323   }
1324 
1325   ptr++;
1326   type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
1327 
1328   if (!type_m) {
1329     jsondec_err(d, "Type was not found");
1330   }
1331 
1332   return type_m;
1333 }
1334 
jsondec_any(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1335 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
1336   /* string type_url = 1;
1337    * bytes value = 2; */
1338   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
1339   upb_Message* any_msg;
1340   const upb_MessageDef* any_m = NULL;
1341   const char* pre_type_data = NULL;
1342   const char* pre_type_end = NULL;
1343   upb_MessageValue encoded;
1344 
1345   jsondec_objstart(d);
1346 
1347   /* Scan looking for "@type", which is not necessarily first. */
1348   while (!any_m && jsondec_objnext(d)) {
1349     const char* start = d->ptr;
1350     upb_StringView name = jsondec_string(d);
1351     jsondec_entrysep(d);
1352     if (jsondec_streql(name, "@type")) {
1353       any_m = jsondec_typeurl(d, msg, m);
1354       if (pre_type_data) {
1355         pre_type_end = start;
1356         while (*pre_type_end != ',') pre_type_end--;
1357       }
1358     } else {
1359       if (!pre_type_data) pre_type_data = start;
1360       jsondec_skipval(d);
1361     }
1362   }
1363 
1364   if (!any_m) {
1365     jsondec_err(d, "Any object didn't contain a '@type' field");
1366   }
1367 
1368   const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
1369   any_msg = upb_Message_New(any_layout, d->arena);
1370 
1371   if (pre_type_data) {
1372     size_t len = pre_type_end - pre_type_data + 1;
1373     char* tmp = upb_Arena_Malloc(d->arena, len);
1374     const char* saved_ptr = d->ptr;
1375     const char* saved_end = d->end;
1376     memcpy(tmp, pre_type_data, len - 1);
1377     tmp[len - 1] = '}';
1378     d->ptr = tmp;
1379     d->end = tmp + len;
1380     d->is_first = true;
1381     while (jsondec_objnext(d)) {
1382       jsondec_anyfield(d, any_msg, any_m);
1383     }
1384     d->ptr = saved_ptr;
1385     d->end = saved_end;
1386   }
1387 
1388   while (jsondec_objnext(d)) {
1389     jsondec_anyfield(d, any_msg, any_m);
1390   }
1391 
1392   jsondec_objend(d);
1393 
1394   upb_EncodeStatus status =
1395       upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0, d->arena,
1396                  (char**)&encoded.str_val.data, &encoded.str_val.size);
1397   // TODO(b/235839510): We should fail gracefully here on a bad return status.
1398   UPB_ASSERT(status == kUpb_EncodeStatus_Ok);
1399   upb_Message_SetFieldByDef(msg, value_f, encoded, d->arena);
1400 }
1401 
jsondec_wrapper(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1402 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
1403                             const upb_MessageDef* m) {
1404   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
1405   upb_MessageValue val = jsondec_value(d, value_f);
1406   upb_Message_SetFieldByDef(msg, value_f, val, d->arena);
1407 }
1408 
jsondec_wellknown(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1409 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
1410                               const upb_MessageDef* m) {
1411   switch (upb_MessageDef_WellKnownType(m)) {
1412     case kUpb_WellKnown_Any:
1413       jsondec_any(d, msg, m);
1414       break;
1415     case kUpb_WellKnown_FieldMask:
1416       jsondec_fieldmask(d, msg, m);
1417       break;
1418     case kUpb_WellKnown_Duration:
1419       jsondec_duration(d, msg, m);
1420       break;
1421     case kUpb_WellKnown_Timestamp:
1422       jsondec_timestamp(d, msg, m);
1423       break;
1424     case kUpb_WellKnown_Value:
1425       jsondec_wellknownvalue(d, msg, m);
1426       break;
1427     case kUpb_WellKnown_ListValue:
1428       jsondec_listvalue(d, msg, m);
1429       break;
1430     case kUpb_WellKnown_Struct:
1431       jsondec_struct(d, msg, m);
1432       break;
1433     case kUpb_WellKnown_DoubleValue:
1434     case kUpb_WellKnown_FloatValue:
1435     case kUpb_WellKnown_Int64Value:
1436     case kUpb_WellKnown_UInt64Value:
1437     case kUpb_WellKnown_Int32Value:
1438     case kUpb_WellKnown_UInt32Value:
1439     case kUpb_WellKnown_StringValue:
1440     case kUpb_WellKnown_BytesValue:
1441     case kUpb_WellKnown_BoolValue:
1442       jsondec_wrapper(d, msg, m);
1443       break;
1444     default:
1445       UPB_UNREACHABLE();
1446   }
1447 }
1448 
upb_JsonDecoder_Decode(jsondec * const d,upb_Message * const msg,const upb_MessageDef * const m)1449 static bool upb_JsonDecoder_Decode(jsondec* const d, upb_Message* const msg,
1450                                    const upb_MessageDef* const m) {
1451   if (UPB_SETJMP(d->err)) return false;
1452 
1453   jsondec_tomsg(d, msg, m);
1454   return true;
1455 }
1456 
upb_JsonDecode(const char * buf,size_t size,upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * symtab,int options,upb_Arena * arena,upb_Status * status)1457 bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
1458                     const upb_MessageDef* m, const upb_DefPool* symtab,
1459                     int options, upb_Arena* arena, upb_Status* status) {
1460   jsondec d;
1461 
1462   if (size == 0) return true;
1463 
1464   d.ptr = buf;
1465   d.end = buf + size;
1466   d.arena = arena;
1467   d.symtab = symtab;
1468   d.status = status;
1469   d.options = options;
1470   d.depth = 64;
1471   d.line = 1;
1472   d.line_begin = d.ptr;
1473   d.debug_field = NULL;
1474   d.is_first = false;
1475 
1476   return upb_JsonDecoder_Decode(&d, msg, m);
1477 }
1478