1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "upb/json/decode.h"
29
30 #include <errno.h>
31 #include <float.h>
32 #include <inttypes.h>
33 #include <limits.h>
34 #include <math.h>
35 #include <stdlib.h>
36 #include <string.h>
37
38 #include "upb/collections/map.h"
39 #include "upb/lex/atoi.h"
40 #include "upb/lex/unicode.h"
41 #include "upb/reflection/message.h"
42 #include "upb/wire/encode.h"
43
44 // Must be last.
45 #include "upb/port/def.inc"
46
47 typedef struct {
48 const char *ptr, *end;
49 upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
50 const upb_DefPool* symtab;
51 int depth;
52 upb_Status* status;
53 jmp_buf err;
54 int line;
55 const char* line_begin;
56 bool is_first;
57 int options;
58 const upb_FieldDef* debug_field;
59 } jsondec;
60
61 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
62
63 /* Forward declarations of mutually-recursive functions. */
64 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
65 const upb_MessageDef* m);
66 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
67 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
68 const upb_MessageDef* m);
69 static void jsondec_object(jsondec* d, upb_Message* msg,
70 const upb_MessageDef* m);
71
jsondec_streql(upb_StringView str,const char * lit)72 static bool jsondec_streql(upb_StringView str, const char* lit) {
73 return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
74 }
75
jsondec_isnullvalue(const upb_FieldDef * f)76 static bool jsondec_isnullvalue(const upb_FieldDef* f) {
77 return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
78 strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
79 "google.protobuf.NullValue") == 0;
80 }
81
jsondec_isvalue(const upb_FieldDef * f)82 static bool jsondec_isvalue(const upb_FieldDef* f) {
83 return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
84 upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
85 kUpb_WellKnown_Value) ||
86 jsondec_isnullvalue(f);
87 }
88
jsondec_err(jsondec * d,const char * msg)89 UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
90 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
91 (int)(d->ptr - d->line_begin), msg);
92 UPB_LONGJMP(d->err, 1);
93 }
94
95 UPB_PRINTF(2, 3)
jsondec_errf(jsondec * d,const char * fmt,...)96 UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
97 va_list argp;
98 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
99 (int)(d->ptr - d->line_begin));
100 va_start(argp, fmt);
101 upb_Status_VAppendErrorFormat(d->status, fmt, argp);
102 va_end(argp);
103 UPB_LONGJMP(d->err, 1);
104 }
105
jsondec_skipws(jsondec * d)106 static void jsondec_skipws(jsondec* d) {
107 while (d->ptr != d->end) {
108 switch (*d->ptr) {
109 case '\n':
110 d->line++;
111 d->line_begin = d->ptr;
112 /* Fallthrough. */
113 case '\r':
114 case '\t':
115 case ' ':
116 d->ptr++;
117 break;
118 default:
119 return;
120 }
121 }
122 jsondec_err(d, "Unexpected EOF");
123 }
124
jsondec_tryparsech(jsondec * d,char ch)125 static bool jsondec_tryparsech(jsondec* d, char ch) {
126 if (d->ptr == d->end || *d->ptr != ch) return false;
127 d->ptr++;
128 return true;
129 }
130
jsondec_parselit(jsondec * d,const char * lit)131 static void jsondec_parselit(jsondec* d, const char* lit) {
132 size_t avail = d->end - d->ptr;
133 size_t len = strlen(lit);
134 if (avail < len || memcmp(d->ptr, lit, len) != 0) {
135 jsondec_errf(d, "Expected: '%s'", lit);
136 }
137 d->ptr += len;
138 }
139
jsondec_wsch(jsondec * d,char ch)140 static void jsondec_wsch(jsondec* d, char ch) {
141 jsondec_skipws(d);
142 if (!jsondec_tryparsech(d, ch)) {
143 jsondec_errf(d, "Expected: '%c'", ch);
144 }
145 }
146
jsondec_true(jsondec * d)147 static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)148 static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)149 static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
150
jsondec_entrysep(jsondec * d)151 static void jsondec_entrysep(jsondec* d) {
152 jsondec_skipws(d);
153 jsondec_parselit(d, ":");
154 }
155
jsondec_rawpeek(jsondec * d)156 static int jsondec_rawpeek(jsondec* d) {
157 switch (*d->ptr) {
158 case '{':
159 return JD_OBJECT;
160 case '[':
161 return JD_ARRAY;
162 case '"':
163 return JD_STRING;
164 case '-':
165 case '0':
166 case '1':
167 case '2':
168 case '3':
169 case '4':
170 case '5':
171 case '6':
172 case '7':
173 case '8':
174 case '9':
175 return JD_NUMBER;
176 case 't':
177 return JD_TRUE;
178 case 'f':
179 return JD_FALSE;
180 case 'n':
181 return JD_NULL;
182 default:
183 jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
184 }
185 }
186
187 /* JSON object/array **********************************************************/
188
189 /* These are used like so:
190 *
191 * jsondec_objstart(d);
192 * while (jsondec_objnext(d)) {
193 * ...
194 * }
195 * jsondec_objend(d) */
196
jsondec_peek(jsondec * d)197 static int jsondec_peek(jsondec* d) {
198 jsondec_skipws(d);
199 return jsondec_rawpeek(d);
200 }
201
jsondec_push(jsondec * d)202 static void jsondec_push(jsondec* d) {
203 if (--d->depth < 0) {
204 jsondec_err(d, "Recursion limit exceeded");
205 }
206 d->is_first = true;
207 }
208
jsondec_seqnext(jsondec * d,char end_ch)209 static bool jsondec_seqnext(jsondec* d, char end_ch) {
210 bool is_first = d->is_first;
211 d->is_first = false;
212 jsondec_skipws(d);
213 if (*d->ptr == end_ch) return false;
214 if (!is_first) jsondec_parselit(d, ",");
215 return true;
216 }
217
jsondec_arrstart(jsondec * d)218 static void jsondec_arrstart(jsondec* d) {
219 jsondec_push(d);
220 jsondec_wsch(d, '[');
221 }
222
jsondec_arrend(jsondec * d)223 static void jsondec_arrend(jsondec* d) {
224 d->depth++;
225 jsondec_wsch(d, ']');
226 }
227
jsondec_arrnext(jsondec * d)228 static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
229
jsondec_objstart(jsondec * d)230 static void jsondec_objstart(jsondec* d) {
231 jsondec_push(d);
232 jsondec_wsch(d, '{');
233 }
234
jsondec_objend(jsondec * d)235 static void jsondec_objend(jsondec* d) {
236 d->depth++;
237 jsondec_wsch(d, '}');
238 }
239
jsondec_objnext(jsondec * d)240 static bool jsondec_objnext(jsondec* d) {
241 if (!jsondec_seqnext(d, '}')) return false;
242 if (jsondec_peek(d) != JD_STRING) {
243 jsondec_err(d, "Object must start with string");
244 }
245 return true;
246 }
247
248 /* JSON number ****************************************************************/
249
jsondec_tryskipdigits(jsondec * d)250 static bool jsondec_tryskipdigits(jsondec* d) {
251 const char* start = d->ptr;
252
253 while (d->ptr < d->end) {
254 if (*d->ptr < '0' || *d->ptr > '9') {
255 break;
256 }
257 d->ptr++;
258 }
259
260 return d->ptr != start;
261 }
262
jsondec_skipdigits(jsondec * d)263 static void jsondec_skipdigits(jsondec* d) {
264 if (!jsondec_tryskipdigits(d)) {
265 jsondec_err(d, "Expected one or more digits");
266 }
267 }
268
jsondec_number(jsondec * d)269 static double jsondec_number(jsondec* d) {
270 const char* start = d->ptr;
271
272 assert(jsondec_rawpeek(d) == JD_NUMBER);
273
274 /* Skip over the syntax of a number, as specified by JSON. */
275 if (*d->ptr == '-') d->ptr++;
276
277 if (jsondec_tryparsech(d, '0')) {
278 if (jsondec_tryskipdigits(d)) {
279 jsondec_err(d, "number cannot have leading zero");
280 }
281 } else {
282 jsondec_skipdigits(d);
283 }
284
285 if (d->ptr == d->end) goto parse;
286 if (jsondec_tryparsech(d, '.')) {
287 jsondec_skipdigits(d);
288 }
289 if (d->ptr == d->end) goto parse;
290
291 if (*d->ptr == 'e' || *d->ptr == 'E') {
292 d->ptr++;
293 if (d->ptr == d->end) {
294 jsondec_err(d, "Unexpected EOF in number");
295 }
296 if (*d->ptr == '+' || *d->ptr == '-') {
297 d->ptr++;
298 }
299 jsondec_skipdigits(d);
300 }
301
302 parse:
303 /* Having verified the syntax of a JSON number, use strtod() to parse
304 * (strtod() accepts a superset of JSON syntax). */
305 errno = 0;
306 {
307 char* end;
308 double val = strtod(start, &end);
309 assert(end == d->ptr);
310
311 /* Currently the min/max-val conformance tests fail if we check this. Does
312 * this mean the conformance tests are wrong or strtod() is wrong, or
313 * something else? Investigate further. */
314 /*
315 if (errno == ERANGE) {
316 jsondec_err(d, "Number out of range");
317 }
318 */
319
320 if (val > DBL_MAX || val < -DBL_MAX) {
321 jsondec_err(d, "Number out of range");
322 }
323
324 return val;
325 }
326 }
327
328 /* JSON string ****************************************************************/
329
jsondec_escape(jsondec * d)330 static char jsondec_escape(jsondec* d) {
331 switch (*d->ptr++) {
332 case '"':
333 return '\"';
334 case '\\':
335 return '\\';
336 case '/':
337 return '/';
338 case 'b':
339 return '\b';
340 case 'f':
341 return '\f';
342 case 'n':
343 return '\n';
344 case 'r':
345 return '\r';
346 case 't':
347 return '\t';
348 default:
349 jsondec_err(d, "Invalid escape char");
350 }
351 }
352
jsondec_codepoint(jsondec * d)353 static uint32_t jsondec_codepoint(jsondec* d) {
354 uint32_t cp = 0;
355 const char* end;
356
357 if (d->end - d->ptr < 4) {
358 jsondec_err(d, "EOF inside string");
359 }
360
361 end = d->ptr + 4;
362 while (d->ptr < end) {
363 char ch = *d->ptr++;
364 if (ch >= '0' && ch <= '9') {
365 ch -= '0';
366 } else if (ch >= 'a' && ch <= 'f') {
367 ch = ch - 'a' + 10;
368 } else if (ch >= 'A' && ch <= 'F') {
369 ch = ch - 'A' + 10;
370 } else {
371 jsondec_err(d, "Invalid hex digit");
372 }
373 cp = (cp << 4) | ch;
374 }
375
376 return cp;
377 }
378
379 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)380 static size_t jsondec_unicode(jsondec* d, char* out) {
381 uint32_t cp = jsondec_codepoint(d);
382 if (upb_Unicode_IsHigh(cp)) {
383 /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
384 jsondec_parselit(d, "\\u");
385 uint32_t low = jsondec_codepoint(d);
386 if (!upb_Unicode_IsLow(low)) jsondec_err(d, "Invalid low surrogate");
387 cp = upb_Unicode_FromPair(cp, low);
388 } else if (upb_Unicode_IsLow(cp)) {
389 jsondec_err(d, "Unpaired low surrogate");
390 }
391
392 /* Write to UTF-8 */
393 int bytes = upb_Unicode_ToUTF8(cp, out);
394 if (bytes == 0) jsondec_err(d, "Invalid codepoint");
395 return bytes;
396 }
397
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)398 static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
399 size_t oldsize = *buf_end - *buf;
400 size_t len = *end - *buf;
401 size_t size = UPB_MAX(8, 2 * oldsize);
402
403 *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
404 if (!*buf) jsondec_err(d, "Out of memory");
405
406 *end = *buf + len;
407 *buf_end = *buf + size;
408 }
409
jsondec_string(jsondec * d)410 static upb_StringView jsondec_string(jsondec* d) {
411 char* buf = NULL;
412 char* end = NULL;
413 char* buf_end = NULL;
414
415 jsondec_skipws(d);
416
417 if (*d->ptr++ != '"') {
418 jsondec_err(d, "Expected string");
419 }
420
421 while (d->ptr < d->end) {
422 char ch = *d->ptr++;
423
424 if (end == buf_end) {
425 jsondec_resize(d, &buf, &end, &buf_end);
426 }
427
428 switch (ch) {
429 case '"': {
430 upb_StringView ret;
431 ret.data = buf;
432 ret.size = end - buf;
433 *end = '\0'; /* Needed for possible strtod(). */
434 return ret;
435 }
436 case '\\':
437 if (d->ptr == d->end) goto eof;
438 if (*d->ptr == 'u') {
439 d->ptr++;
440 if (buf_end - end < 4) {
441 /* Allow space for maximum-sized codepoint (4 bytes). */
442 jsondec_resize(d, &buf, &end, &buf_end);
443 }
444 end += jsondec_unicode(d, end);
445 } else {
446 *end++ = jsondec_escape(d);
447 }
448 break;
449 default:
450 if ((unsigned char)*d->ptr < 0x20) {
451 jsondec_err(d, "Invalid char in JSON string");
452 }
453 *end++ = ch;
454 break;
455 }
456 }
457
458 eof:
459 jsondec_err(d, "EOF inside string");
460 }
461
jsondec_skipval(jsondec * d)462 static void jsondec_skipval(jsondec* d) {
463 switch (jsondec_peek(d)) {
464 case JD_OBJECT:
465 jsondec_objstart(d);
466 while (jsondec_objnext(d)) {
467 jsondec_string(d);
468 jsondec_entrysep(d);
469 jsondec_skipval(d);
470 }
471 jsondec_objend(d);
472 break;
473 case JD_ARRAY:
474 jsondec_arrstart(d);
475 while (jsondec_arrnext(d)) {
476 jsondec_skipval(d);
477 }
478 jsondec_arrend(d);
479 break;
480 case JD_TRUE:
481 jsondec_true(d);
482 break;
483 case JD_FALSE:
484 jsondec_false(d);
485 break;
486 case JD_NULL:
487 jsondec_null(d);
488 break;
489 case JD_STRING:
490 jsondec_string(d);
491 break;
492 case JD_NUMBER:
493 jsondec_number(d);
494 break;
495 }
496 }
497
498 /* Base64 decoding for bytes fields. ******************************************/
499
jsondec_base64_tablelookup(const char ch)500 static unsigned int jsondec_base64_tablelookup(const char ch) {
501 /* Table includes the normal base64 chars plus the URL-safe variant. */
502 const signed char table[256] = {
503 -1, -1, -1, -1, -1, -1, -1,
504 -1, -1, -1, -1, -1, -1, -1,
505 -1, -1, -1, -1, -1, -1, -1,
506 -1, -1, -1, -1, -1, -1, -1,
507 -1, -1, -1, -1, -1, -1, -1,
508 -1, -1, -1, -1, -1, -1, -1,
509 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
510 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
511 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
512 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
513 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
514 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
515 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
516 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
517 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
518 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
519 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
520 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
521 -1, -1, -1, -1, -1, -1, -1,
522 -1, -1, -1, -1, -1, -1, -1,
523 -1, -1, -1, -1, -1, -1, -1,
524 -1, -1, -1, -1, -1, -1, -1,
525 -1, -1, -1, -1, -1, -1, -1,
526 -1, -1, -1, -1, -1, -1, -1,
527 -1, -1, -1, -1, -1, -1, -1,
528 -1, -1, -1, -1, -1, -1, -1,
529 -1, -1, -1, -1, -1, -1, -1,
530 -1, -1, -1, -1, -1, -1, -1,
531 -1, -1, -1, -1, -1, -1, -1,
532 -1, -1, -1, -1, -1, -1, -1,
533 -1, -1, -1, -1, -1, -1, -1,
534 -1, -1, -1, -1, -1, -1, -1,
535 -1, -1, -1, -1, -1, -1, -1,
536 -1, -1, -1, -1, -1, -1, -1,
537 -1, -1, -1, -1, -1, -1, -1,
538 -1, -1, -1, -1, -1, -1, -1,
539 -1, -1, -1, -1};
540
541 /* Sign-extend return value so high bit will be set on any unexpected char. */
542 return table[(unsigned)ch];
543 }
544
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)545 static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
546 char* out) {
547 int32_t val = -1;
548
549 switch (end - ptr) {
550 case 2:
551 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
552 jsondec_base64_tablelookup(ptr[1]) << 12;
553 out[0] = val >> 16;
554 out += 1;
555 break;
556 case 3:
557 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
558 jsondec_base64_tablelookup(ptr[1]) << 12 |
559 jsondec_base64_tablelookup(ptr[2]) << 6;
560 out[0] = val >> 16;
561 out[1] = (val >> 8) & 0xff;
562 out += 2;
563 break;
564 }
565
566 if (val < 0) {
567 jsondec_err(d, "Corrupt base64");
568 }
569
570 return out;
571 }
572
jsondec_base64(jsondec * d,upb_StringView str)573 static size_t jsondec_base64(jsondec* d, upb_StringView str) {
574 /* We decode in place. This is safe because this is a new buffer (not
575 * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
576 char* out = (char*)str.data;
577 const char* ptr = str.data;
578 const char* end = ptr + str.size;
579 const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
580
581 for (; ptr < end4; ptr += 4, out += 3) {
582 int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
583 jsondec_base64_tablelookup(ptr[1]) << 12 |
584 jsondec_base64_tablelookup(ptr[2]) << 6 |
585 jsondec_base64_tablelookup(ptr[3]) << 0;
586
587 if (val < 0) {
588 /* Junk chars or padding. Remove trailing padding, if any. */
589 if (end - ptr == 4 && ptr[3] == '=') {
590 if (ptr[2] == '=') {
591 end -= 2;
592 } else {
593 end -= 1;
594 }
595 }
596 break;
597 }
598
599 out[0] = val >> 16;
600 out[1] = (val >> 8) & 0xff;
601 out[2] = val & 0xff;
602 }
603
604 if (ptr < end) {
605 /* Process remaining chars. We do not require padding. */
606 out = jsondec_partialbase64(d, ptr, end, out);
607 }
608
609 return out - str.data;
610 }
611
612 /* Low-level integer parsing **************************************************/
613
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)614 static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
615 const char* end, uint64_t* val) {
616 const char* out = upb_BufToUint64(ptr, end, val);
617 if (!out) jsondec_err(d, "Integer overflow");
618 return out;
619 }
620
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val,bool * is_neg)621 static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
622 const char* end, int64_t* val,
623 bool* is_neg) {
624 const char* out = upb_BufToInt64(ptr, end, val, is_neg);
625 if (!out) jsondec_err(d, "Integer overflow");
626 return out;
627 }
628
jsondec_strtouint64(jsondec * d,upb_StringView str)629 static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
630 const char* end = str.data + str.size;
631 uint64_t ret;
632 if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
633 jsondec_err(d, "Non-number characters in quoted integer");
634 }
635 return ret;
636 }
637
jsondec_strtoint64(jsondec * d,upb_StringView str)638 static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
639 const char* end = str.data + str.size;
640 int64_t ret;
641 if (jsondec_buftoint64(d, str.data, end, &ret, NULL) != end) {
642 jsondec_err(d, "Non-number characters in quoted integer");
643 }
644 return ret;
645 }
646
647 /* Primitive value types ******************************************************/
648
649 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_FieldDef * f)650 static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
651 upb_MessageValue val;
652
653 switch (jsondec_peek(d)) {
654 case JD_NUMBER: {
655 double dbl = jsondec_number(d);
656 if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
657 jsondec_err(d, "JSON number is out of range.");
658 }
659 val.int64_val = dbl; /* must be guarded, overflow here is UB */
660 if (val.int64_val != dbl) {
661 jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
662 val.int64_val);
663 }
664 break;
665 }
666 case JD_STRING: {
667 upb_StringView str = jsondec_string(d);
668 val.int64_val = jsondec_strtoint64(d, str);
669 break;
670 }
671 default:
672 jsondec_err(d, "Expected number or string");
673 }
674
675 if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
676 upb_FieldDef_CType(f) == kUpb_CType_Enum) {
677 if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
678 jsondec_err(d, "Integer out of range.");
679 }
680 val.int32_val = (int32_t)val.int64_val;
681 }
682
683 return val;
684 }
685
686 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_FieldDef * f)687 static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
688 upb_MessageValue val = {0};
689
690 switch (jsondec_peek(d)) {
691 case JD_NUMBER: {
692 double dbl = jsondec_number(d);
693 if (dbl > 18446744073709549568.0 || dbl < 0) {
694 jsondec_err(d, "JSON number is out of range.");
695 }
696 val.uint64_val = dbl; /* must be guarded, overflow here is UB */
697 if (val.uint64_val != dbl) {
698 jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
699 val.uint64_val);
700 }
701 break;
702 }
703 case JD_STRING: {
704 upb_StringView str = jsondec_string(d);
705 val.uint64_val = jsondec_strtouint64(d, str);
706 break;
707 }
708 default:
709 jsondec_err(d, "Expected number or string");
710 }
711
712 if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
713 if (val.uint64_val > UINT32_MAX) {
714 jsondec_err(d, "Integer out of range.");
715 }
716 val.uint32_val = (uint32_t)val.uint64_val;
717 }
718
719 return val;
720 }
721
722 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_FieldDef * f)723 static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
724 upb_StringView str;
725 upb_MessageValue val = {0};
726
727 switch (jsondec_peek(d)) {
728 case JD_NUMBER:
729 val.double_val = jsondec_number(d);
730 break;
731 case JD_STRING:
732 str = jsondec_string(d);
733 if (jsondec_streql(str, "NaN")) {
734 val.double_val = NAN;
735 } else if (jsondec_streql(str, "Infinity")) {
736 val.double_val = INFINITY;
737 } else if (jsondec_streql(str, "-Infinity")) {
738 val.double_val = -INFINITY;
739 } else {
740 val.double_val = strtod(str.data, NULL);
741 }
742 break;
743 default:
744 jsondec_err(d, "Expected number or string");
745 }
746
747 if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
748 float f = val.double_val;
749 if (val.double_val != INFINITY && val.double_val != -INFINITY) {
750 if (f == INFINITY || f == -INFINITY) jsondec_err(d, "Float out of range");
751 }
752 val.float_val = f;
753 }
754
755 return val;
756 }
757
758 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_FieldDef * f)759 static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
760 upb_MessageValue val;
761 val.str_val = jsondec_string(d);
762 if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
763 val.str_val.size = jsondec_base64(d, val.str_val);
764 }
765 return val;
766 }
767
jsondec_enum(jsondec * d,const upb_FieldDef * f)768 static upb_MessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
769 switch (jsondec_peek(d)) {
770 case JD_STRING: {
771 upb_StringView str = jsondec_string(d);
772 const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
773 const upb_EnumValueDef* ev =
774 upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
775 upb_MessageValue val;
776 if (ev) {
777 val.int32_val = upb_EnumValueDef_Number(ev);
778 } else {
779 if (d->options & upb_JsonDecode_IgnoreUnknown) {
780 val.int32_val = 0;
781 } else {
782 jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
783 UPB_STRINGVIEW_ARGS(str));
784 }
785 }
786 return val;
787 }
788 case JD_NULL: {
789 if (jsondec_isnullvalue(f)) {
790 upb_MessageValue val;
791 jsondec_null(d);
792 val.int32_val = 0;
793 return val;
794 }
795 }
796 /* Fallthrough. */
797 default:
798 return jsondec_int(d, f);
799 }
800 }
801
jsondec_bool(jsondec * d,const upb_FieldDef * f)802 static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
803 bool is_map_key = upb_FieldDef_Number(f) == 1 &&
804 upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
805 upb_MessageValue val;
806
807 if (is_map_key) {
808 upb_StringView str = jsondec_string(d);
809 if (jsondec_streql(str, "true")) {
810 val.bool_val = true;
811 } else if (jsondec_streql(str, "false")) {
812 val.bool_val = false;
813 } else {
814 jsondec_err(d, "Invalid boolean map key");
815 }
816 } else {
817 switch (jsondec_peek(d)) {
818 case JD_TRUE:
819 val.bool_val = true;
820 jsondec_true(d);
821 break;
822 case JD_FALSE:
823 val.bool_val = false;
824 jsondec_false(d);
825 break;
826 default:
827 jsondec_err(d, "Expected true or false");
828 }
829 }
830
831 return val;
832 }
833
834 /* Composite types (array/message/map) ****************************************/
835
jsondec_array(jsondec * d,upb_Message * msg,const upb_FieldDef * f)836 static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
837 upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
838
839 jsondec_arrstart(d);
840 while (jsondec_arrnext(d)) {
841 upb_MessageValue elem = jsondec_value(d, f);
842 upb_Array_Append(arr, elem, d->arena);
843 }
844 jsondec_arrend(d);
845 }
846
jsondec_map(jsondec * d,upb_Message * msg,const upb_FieldDef * f)847 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
848 upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
849 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
850 const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
851 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
852
853 jsondec_objstart(d);
854 while (jsondec_objnext(d)) {
855 upb_MessageValue key, val;
856 key = jsondec_value(d, key_f);
857 jsondec_entrysep(d);
858 val = jsondec_value(d, val_f);
859 upb_Map_Set(map, key, val, d->arena);
860 }
861 jsondec_objend(d);
862 }
863
jsondec_tomsg(jsondec * d,upb_Message * msg,const upb_MessageDef * m)864 static void jsondec_tomsg(jsondec* d, upb_Message* msg,
865 const upb_MessageDef* m) {
866 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
867 jsondec_object(d, msg, m);
868 } else {
869 jsondec_wellknown(d, msg, m);
870 }
871 }
872
jsondec_msg(jsondec * d,const upb_FieldDef * f)873 static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
874 const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
875 const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
876 upb_Message* msg = upb_Message_New(layout, d->arena);
877 upb_MessageValue val;
878
879 jsondec_tomsg(d, msg, m);
880 val.msg_val = msg;
881 return val;
882 }
883
jsondec_field(jsondec * d,upb_Message * msg,const upb_MessageDef * m)884 static void jsondec_field(jsondec* d, upb_Message* msg,
885 const upb_MessageDef* m) {
886 upb_StringView name;
887 const upb_FieldDef* f;
888 const upb_FieldDef* preserved;
889
890 name = jsondec_string(d);
891 jsondec_entrysep(d);
892
893 if (name.size >= 2 && name.data[0] == '[' &&
894 name.data[name.size - 1] == ']') {
895 f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
896 name.size - 2);
897 if (f && upb_FieldDef_ContainingType(f) != m) {
898 jsondec_errf(
899 d, "Extension %s extends message %s, but was seen in message %s",
900 upb_FieldDef_FullName(f),
901 upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
902 upb_MessageDef_FullName(m));
903 }
904 } else {
905 f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
906 }
907
908 if (!f) {
909 if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
910 jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
911 UPB_STRINGVIEW_ARGS(name));
912 }
913 jsondec_skipval(d);
914 return;
915 }
916
917 if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
918 /* JSON "null" indicates a default value, so no need to set anything. */
919 jsondec_null(d);
920 return;
921 }
922
923 if (upb_FieldDef_RealContainingOneof(f) &&
924 upb_Message_WhichOneof(msg, upb_FieldDef_ContainingOneof(f))) {
925 jsondec_err(d, "More than one field for this oneof.");
926 }
927
928 preserved = d->debug_field;
929 d->debug_field = f;
930
931 if (upb_FieldDef_IsMap(f)) {
932 jsondec_map(d, msg, f);
933 } else if (upb_FieldDef_IsRepeated(f)) {
934 jsondec_array(d, msg, f);
935 } else if (upb_FieldDef_IsSubMessage(f)) {
936 upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
937 const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
938 jsondec_tomsg(d, submsg, subm);
939 } else {
940 upb_MessageValue val = jsondec_value(d, f);
941 upb_Message_SetFieldByDef(msg, f, val, d->arena);
942 }
943
944 d->debug_field = preserved;
945 }
946
jsondec_object(jsondec * d,upb_Message * msg,const upb_MessageDef * m)947 static void jsondec_object(jsondec* d, upb_Message* msg,
948 const upb_MessageDef* m) {
949 jsondec_objstart(d);
950 while (jsondec_objnext(d)) {
951 jsondec_field(d, msg, m);
952 }
953 jsondec_objend(d);
954 }
955
jsondec_value(jsondec * d,const upb_FieldDef * f)956 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
957 switch (upb_FieldDef_CType(f)) {
958 case kUpb_CType_Bool:
959 return jsondec_bool(d, f);
960 case kUpb_CType_Float:
961 case kUpb_CType_Double:
962 return jsondec_double(d, f);
963 case kUpb_CType_UInt32:
964 case kUpb_CType_UInt64:
965 return jsondec_uint(d, f);
966 case kUpb_CType_Int32:
967 case kUpb_CType_Int64:
968 return jsondec_int(d, f);
969 case kUpb_CType_String:
970 case kUpb_CType_Bytes:
971 return jsondec_strfield(d, f);
972 case kUpb_CType_Enum:
973 return jsondec_enum(d, f);
974 case kUpb_CType_Message:
975 return jsondec_msg(d, f);
976 default:
977 UPB_UNREACHABLE();
978 }
979 }
980
981 /* Well-known types ***********************************************************/
982
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)983 static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
984 const char* after) {
985 uint64_t val;
986 const char* p = *ptr;
987 const char* end = p + digits;
988 size_t after_len = after ? strlen(after) : 0;
989
990 UPB_ASSERT(digits <= 9); /* int can't overflow. */
991
992 if (jsondec_buftouint64(d, p, end, &val) != end ||
993 (after_len && memcmp(end, after, after_len) != 0)) {
994 jsondec_err(d, "Malformed timestamp");
995 }
996
997 UPB_ASSERT(val < INT_MAX);
998
999 *ptr = end + after_len;
1000 return (int)val;
1001 }
1002
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)1003 static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
1004 uint64_t nanos = 0;
1005 const char* p = *ptr;
1006
1007 if (p != end && *p == '.') {
1008 const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1009 int digits = (int)(nano_end - p - 1);
1010 int exp_lg10 = 9 - digits;
1011 if (digits > 9) {
1012 jsondec_err(d, "Too many digits for partial seconds");
1013 }
1014 while (exp_lg10--) nanos *= 10;
1015 *ptr = nano_end;
1016 }
1017
1018 UPB_ASSERT(nanos < INT_MAX);
1019
1020 return (int)nanos;
1021 }
1022
1023 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)1024 int jsondec_epochdays(int y, int m, int d) {
1025 const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
1026 const uint32_t m_adj = m - 3; /* March-based month. */
1027 const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1028 const uint32_t adjust = carry ? 12 : 0;
1029 const uint32_t y_adj = y + year_base - carry;
1030 const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1031 const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1032 return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1033 }
1034
jsondec_unixtime(int y,int m,int d,int h,int min,int s)1035 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1036 return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1037 }
1038
jsondec_timestamp(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1039 static void jsondec_timestamp(jsondec* d, upb_Message* msg,
1040 const upb_MessageDef* m) {
1041 upb_MessageValue seconds;
1042 upb_MessageValue nanos;
1043 upb_StringView str = jsondec_string(d);
1044 const char* ptr = str.data;
1045 const char* end = ptr + str.size;
1046
1047 if (str.size < 20) goto malformed;
1048
1049 {
1050 /* 1972-01-01T01:00:00 */
1051 int year = jsondec_tsdigits(d, &ptr, 4, "-");
1052 int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1053 int day = jsondec_tsdigits(d, &ptr, 2, "T");
1054 int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1055 int min = jsondec_tsdigits(d, &ptr, 2, ":");
1056 int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1057
1058 seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1059 }
1060
1061 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1062
1063 {
1064 /* [+-]08:00 or Z */
1065 int ofs_hour = 0;
1066 int ofs_min = 0;
1067 bool neg = false;
1068
1069 if (ptr == end) goto malformed;
1070
1071 switch (*ptr++) {
1072 case '-':
1073 neg = true;
1074 /* fallthrough */
1075 case '+':
1076 if ((end - ptr) != 5) goto malformed;
1077 ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
1078 ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
1079 ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
1080 seconds.int64_val += (neg ? ofs_min : -ofs_min);
1081 break;
1082 case 'Z':
1083 if (ptr != end) goto malformed;
1084 break;
1085 default:
1086 goto malformed;
1087 }
1088 }
1089
1090 if (seconds.int64_val < -62135596800) {
1091 jsondec_err(d, "Timestamp out of range");
1092 }
1093
1094 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1095 seconds, d->arena);
1096 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1097 d->arena);
1098 return;
1099
1100 malformed:
1101 jsondec_err(d, "Malformed timestamp");
1102 }
1103
jsondec_duration(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1104 static void jsondec_duration(jsondec* d, upb_Message* msg,
1105 const upb_MessageDef* m) {
1106 upb_MessageValue seconds;
1107 upb_MessageValue nanos;
1108 upb_StringView str = jsondec_string(d);
1109 const char* ptr = str.data;
1110 const char* end = ptr + str.size;
1111 const int64_t max = (uint64_t)3652500 * 86400;
1112 bool neg = false;
1113
1114 /* "3.000000001s", "3s", etc. */
1115 ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val, &neg);
1116 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1117
1118 if (end - ptr != 1 || *ptr != 's') {
1119 jsondec_err(d, "Malformed duration");
1120 }
1121
1122 if (seconds.int64_val < -max || seconds.int64_val > max) {
1123 jsondec_err(d, "Duration out of range");
1124 }
1125
1126 if (neg) {
1127 nanos.int32_val = -nanos.int32_val;
1128 }
1129
1130 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1131 seconds, d->arena);
1132 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1133 d->arena);
1134 }
1135
jsondec_listvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1136 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
1137 const upb_MessageDef* m) {
1138 const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
1139 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
1140 const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1141 upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
1142
1143 jsondec_arrstart(d);
1144 while (jsondec_arrnext(d)) {
1145 upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1146 upb_MessageValue value;
1147 value.msg_val = value_msg;
1148 upb_Array_Append(values, value, d->arena);
1149 jsondec_wellknownvalue(d, value_msg, value_m);
1150 }
1151 jsondec_arrend(d);
1152 }
1153
jsondec_struct(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1154 static void jsondec_struct(jsondec* d, upb_Message* msg,
1155 const upb_MessageDef* m) {
1156 const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
1157 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
1158 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
1159 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
1160 const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1161 upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
1162
1163 jsondec_objstart(d);
1164 while (jsondec_objnext(d)) {
1165 upb_MessageValue key, value;
1166 upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1167 key.str_val = jsondec_string(d);
1168 value.msg_val = value_msg;
1169 upb_Map_Set(fields, key, value, d->arena);
1170 jsondec_entrysep(d);
1171 jsondec_wellknownvalue(d, value_msg, value_m);
1172 }
1173 jsondec_objend(d);
1174 }
1175
jsondec_wellknownvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1176 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
1177 const upb_MessageDef* m) {
1178 upb_MessageValue val;
1179 const upb_FieldDef* f;
1180 upb_Message* submsg;
1181
1182 switch (jsondec_peek(d)) {
1183 case JD_NUMBER:
1184 /* double number_value = 2; */
1185 f = upb_MessageDef_FindFieldByNumber(m, 2);
1186 val.double_val = jsondec_number(d);
1187 break;
1188 case JD_STRING:
1189 /* string string_value = 3; */
1190 f = upb_MessageDef_FindFieldByNumber(m, 3);
1191 val.str_val = jsondec_string(d);
1192 break;
1193 case JD_FALSE:
1194 /* bool bool_value = 4; */
1195 f = upb_MessageDef_FindFieldByNumber(m, 4);
1196 val.bool_val = false;
1197 jsondec_false(d);
1198 break;
1199 case JD_TRUE:
1200 /* bool bool_value = 4; */
1201 f = upb_MessageDef_FindFieldByNumber(m, 4);
1202 val.bool_val = true;
1203 jsondec_true(d);
1204 break;
1205 case JD_NULL:
1206 /* NullValue null_value = 1; */
1207 f = upb_MessageDef_FindFieldByNumber(m, 1);
1208 val.int32_val = 0;
1209 jsondec_null(d);
1210 break;
1211 /* Note: these cases return, because upb_Message_Mutable() is enough. */
1212 case JD_OBJECT:
1213 /* Struct struct_value = 5; */
1214 f = upb_MessageDef_FindFieldByNumber(m, 5);
1215 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1216 jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
1217 return;
1218 case JD_ARRAY:
1219 /* ListValue list_value = 6; */
1220 f = upb_MessageDef_FindFieldByNumber(m, 6);
1221 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1222 jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
1223 return;
1224 default:
1225 UPB_UNREACHABLE();
1226 }
1227
1228 upb_Message_SetFieldByDef(msg, f, val, d->arena);
1229 }
1230
jsondec_mask(jsondec * d,const char * buf,const char * end)1231 static upb_StringView jsondec_mask(jsondec* d, const char* buf,
1232 const char* end) {
1233 /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1234 * transform in place. */
1235 const char* ptr = buf;
1236 upb_StringView ret;
1237 char* out;
1238
1239 ret.size = end - ptr;
1240 while (ptr < end) {
1241 ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1242 ptr++;
1243 }
1244
1245 out = upb_Arena_Malloc(d->arena, ret.size);
1246 ptr = buf;
1247 ret.data = out;
1248
1249 while (ptr < end) {
1250 char ch = *ptr++;
1251 if (ch >= 'A' && ch <= 'Z') {
1252 *out++ = '_';
1253 *out++ = ch + 32;
1254 } else if (ch == '_') {
1255 jsondec_err(d, "field mask may not contain '_'");
1256 } else {
1257 *out++ = ch;
1258 }
1259 }
1260
1261 return ret;
1262 }
1263
jsondec_fieldmask(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1264 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
1265 const upb_MessageDef* m) {
1266 /* repeated string paths = 1; */
1267 const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
1268 upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
1269 upb_StringView str = jsondec_string(d);
1270 const char* ptr = str.data;
1271 const char* end = ptr + str.size;
1272 upb_MessageValue val;
1273
1274 while (ptr < end) {
1275 const char* elem_end = memchr(ptr, ',', end - ptr);
1276 if (elem_end) {
1277 val.str_val = jsondec_mask(d, ptr, elem_end);
1278 ptr = elem_end + 1;
1279 } else {
1280 val.str_val = jsondec_mask(d, ptr, end);
1281 ptr = end;
1282 }
1283 upb_Array_Append(arr, val, d->arena);
1284 }
1285 }
1286
jsondec_anyfield(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1287 static void jsondec_anyfield(jsondec* d, upb_Message* msg,
1288 const upb_MessageDef* m) {
1289 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
1290 /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1291 * where f1, f2, etc. are the normal fields of this type. */
1292 jsondec_field(d, msg, m);
1293 } else {
1294 /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1295 * where <X> is whatever encoding the WKT normally uses. */
1296 upb_StringView str = jsondec_string(d);
1297 jsondec_entrysep(d);
1298 if (!jsondec_streql(str, "value")) {
1299 jsondec_err(d, "Key for well-known type must be 'value'");
1300 }
1301 jsondec_wellknown(d, msg, m);
1302 }
1303 }
1304
jsondec_typeurl(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1305 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
1306 const upb_MessageDef* m) {
1307 const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
1308 const upb_MessageDef* type_m;
1309 upb_StringView type_url = jsondec_string(d);
1310 const char* end = type_url.data + type_url.size;
1311 const char* ptr = end;
1312 upb_MessageValue val;
1313
1314 val.str_val = type_url;
1315 upb_Message_SetFieldByDef(msg, type_url_f, val, d->arena);
1316
1317 /* Find message name after the last '/' */
1318 while (ptr > type_url.data && *--ptr != '/') {
1319 }
1320
1321 if (ptr == type_url.data || ptr == end) {
1322 jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1323 }
1324
1325 ptr++;
1326 type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
1327
1328 if (!type_m) {
1329 jsondec_err(d, "Type was not found");
1330 }
1331
1332 return type_m;
1333 }
1334
jsondec_any(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1335 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
1336 /* string type_url = 1;
1337 * bytes value = 2; */
1338 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
1339 upb_Message* any_msg;
1340 const upb_MessageDef* any_m = NULL;
1341 const char* pre_type_data = NULL;
1342 const char* pre_type_end = NULL;
1343 upb_MessageValue encoded;
1344
1345 jsondec_objstart(d);
1346
1347 /* Scan looking for "@type", which is not necessarily first. */
1348 while (!any_m && jsondec_objnext(d)) {
1349 const char* start = d->ptr;
1350 upb_StringView name = jsondec_string(d);
1351 jsondec_entrysep(d);
1352 if (jsondec_streql(name, "@type")) {
1353 any_m = jsondec_typeurl(d, msg, m);
1354 if (pre_type_data) {
1355 pre_type_end = start;
1356 while (*pre_type_end != ',') pre_type_end--;
1357 }
1358 } else {
1359 if (!pre_type_data) pre_type_data = start;
1360 jsondec_skipval(d);
1361 }
1362 }
1363
1364 if (!any_m) {
1365 jsondec_err(d, "Any object didn't contain a '@type' field");
1366 }
1367
1368 const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
1369 any_msg = upb_Message_New(any_layout, d->arena);
1370
1371 if (pre_type_data) {
1372 size_t len = pre_type_end - pre_type_data + 1;
1373 char* tmp = upb_Arena_Malloc(d->arena, len);
1374 const char* saved_ptr = d->ptr;
1375 const char* saved_end = d->end;
1376 memcpy(tmp, pre_type_data, len - 1);
1377 tmp[len - 1] = '}';
1378 d->ptr = tmp;
1379 d->end = tmp + len;
1380 d->is_first = true;
1381 while (jsondec_objnext(d)) {
1382 jsondec_anyfield(d, any_msg, any_m);
1383 }
1384 d->ptr = saved_ptr;
1385 d->end = saved_end;
1386 }
1387
1388 while (jsondec_objnext(d)) {
1389 jsondec_anyfield(d, any_msg, any_m);
1390 }
1391
1392 jsondec_objend(d);
1393
1394 upb_EncodeStatus status =
1395 upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0, d->arena,
1396 (char**)&encoded.str_val.data, &encoded.str_val.size);
1397 // TODO(b/235839510): We should fail gracefully here on a bad return status.
1398 UPB_ASSERT(status == kUpb_EncodeStatus_Ok);
1399 upb_Message_SetFieldByDef(msg, value_f, encoded, d->arena);
1400 }
1401
jsondec_wrapper(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1402 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
1403 const upb_MessageDef* m) {
1404 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
1405 upb_MessageValue val = jsondec_value(d, value_f);
1406 upb_Message_SetFieldByDef(msg, value_f, val, d->arena);
1407 }
1408
jsondec_wellknown(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1409 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
1410 const upb_MessageDef* m) {
1411 switch (upb_MessageDef_WellKnownType(m)) {
1412 case kUpb_WellKnown_Any:
1413 jsondec_any(d, msg, m);
1414 break;
1415 case kUpb_WellKnown_FieldMask:
1416 jsondec_fieldmask(d, msg, m);
1417 break;
1418 case kUpb_WellKnown_Duration:
1419 jsondec_duration(d, msg, m);
1420 break;
1421 case kUpb_WellKnown_Timestamp:
1422 jsondec_timestamp(d, msg, m);
1423 break;
1424 case kUpb_WellKnown_Value:
1425 jsondec_wellknownvalue(d, msg, m);
1426 break;
1427 case kUpb_WellKnown_ListValue:
1428 jsondec_listvalue(d, msg, m);
1429 break;
1430 case kUpb_WellKnown_Struct:
1431 jsondec_struct(d, msg, m);
1432 break;
1433 case kUpb_WellKnown_DoubleValue:
1434 case kUpb_WellKnown_FloatValue:
1435 case kUpb_WellKnown_Int64Value:
1436 case kUpb_WellKnown_UInt64Value:
1437 case kUpb_WellKnown_Int32Value:
1438 case kUpb_WellKnown_UInt32Value:
1439 case kUpb_WellKnown_StringValue:
1440 case kUpb_WellKnown_BytesValue:
1441 case kUpb_WellKnown_BoolValue:
1442 jsondec_wrapper(d, msg, m);
1443 break;
1444 default:
1445 UPB_UNREACHABLE();
1446 }
1447 }
1448
upb_JsonDecoder_Decode(jsondec * const d,upb_Message * const msg,const upb_MessageDef * const m)1449 static bool upb_JsonDecoder_Decode(jsondec* const d, upb_Message* const msg,
1450 const upb_MessageDef* const m) {
1451 if (UPB_SETJMP(d->err)) return false;
1452
1453 jsondec_tomsg(d, msg, m);
1454 return true;
1455 }
1456
upb_JsonDecode(const char * buf,size_t size,upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * symtab,int options,upb_Arena * arena,upb_Status * status)1457 bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
1458 const upb_MessageDef* m, const upb_DefPool* symtab,
1459 int options, upb_Arena* arena, upb_Status* status) {
1460 jsondec d;
1461
1462 if (size == 0) return true;
1463
1464 d.ptr = buf;
1465 d.end = buf + size;
1466 d.arena = arena;
1467 d.symtab = symtab;
1468 d.status = status;
1469 d.options = options;
1470 d.depth = 64;
1471 d.line = 1;
1472 d.line_begin = d.ptr;
1473 d.debug_field = NULL;
1474 d.is_first = false;
1475
1476 return upb_JsonDecoder_Decode(&d, msg, m);
1477 }
1478