1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "upb/wire/decode.h"
29 
30 #include <string.h>
31 
32 #include "upb/base/descriptor_constants.h"
33 #include "upb/collections/array_internal.h"
34 #include "upb/collections/map_internal.h"
35 #include "upb/mem/arena_internal.h"
36 #include "upb/mini_table/common.h"
37 #include "upb/mini_table/enum_internal.h"
38 #include "upb/port/atomic.h"
39 #include "upb/wire/common.h"
40 #include "upb/wire/common_internal.h"
41 #include "upb/wire/decode_internal.h"
42 #include "upb/wire/encode.h"
43 #include "upb/wire/eps_copy_input_stream.h"
44 #include "upb/wire/reader.h"
45 #include "upb/wire/swap_internal.h"
46 #include "upb/wire/types.h"
47 
48 // Must be last.
49 #include "upb/port/def.inc"
50 
51 // A few fake field types for our tables.
52 enum {
53   kUpb_FakeFieldType_FieldNotFound = 0,
54   kUpb_FakeFieldType_MessageSetItem = 19,
55 };
56 
57 // DecodeOp: an action to be performed for a wire-type/field-type combination.
58 enum {
59   // Special ops: we don't write data to regular fields for these.
60   kUpb_DecodeOp_UnknownField = -1,
61   kUpb_DecodeOp_MessageSetItem = -2,
62 
63   // Scalar-only ops.
64   kUpb_DecodeOp_Scalar1Byte = 0,
65   kUpb_DecodeOp_Scalar4Byte = 2,
66   kUpb_DecodeOp_Scalar8Byte = 3,
67   kUpb_DecodeOp_Enum = 1,
68 
69   // Scalar/repeated ops.
70   kUpb_DecodeOp_String = 4,
71   kUpb_DecodeOp_Bytes = 5,
72   kUpb_DecodeOp_SubMessage = 6,
73 
74   // Repeated-only ops (also see macros below).
75   kUpb_DecodeOp_PackedEnum = 13,
76 };
77 
78 // For packed fields it is helpful to be able to recover the lg2 of the data
79 // size from the op.
80 #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
81 #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
82 
83 typedef union {
84   bool bool_val;
85   uint32_t uint32_val;
86   uint64_t uint64_val;
87   uint32_t size;
88 } wireval;
89 
90 static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
91                                               upb_Message* msg,
92                                               const upb_MiniTable* layout);
93 
_upb_Decoder_ErrorJmp(upb_Decoder * d,upb_DecodeStatus status)94 UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d,
95                                                 upb_DecodeStatus status) {
96   assert(status != kUpb_DecodeStatus_Ok);
97   d->status = status;
98   UPB_LONGJMP(d->err, 1);
99 }
100 
_upb_FastDecoder_ErrorJmp(upb_Decoder * d,int status)101 const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) {
102   assert(status != kUpb_DecodeStatus_Ok);
103   d->status = status;
104   UPB_LONGJMP(d->err, 1);
105   return NULL;
106 }
107 
_upb_Decoder_VerifyUtf8(upb_Decoder * d,const char * buf,int len)108 static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) {
109   if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) {
110     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8);
111   }
112 }
113 
_upb_Decoder_Reserve(upb_Decoder * d,upb_Array * arr,size_t elem)114 static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
115   bool need_realloc = arr->capacity - arr->size < elem;
116   if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) {
117     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
118   }
119   return need_realloc;
120 }
121 
122 typedef struct {
123   const char* ptr;
124   uint64_t val;
125 } _upb_DecodeLongVarintReturn;
126 
127 UPB_NOINLINE
_upb_Decoder_DecodeLongVarint(const char * ptr,uint64_t val)128 static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint(
129     const char* ptr, uint64_t val) {
130   _upb_DecodeLongVarintReturn ret = {NULL, 0};
131   uint64_t byte;
132   int i;
133   for (i = 1; i < 10; i++) {
134     byte = (uint8_t)ptr[i];
135     val += (byte - 1) << (i * 7);
136     if (!(byte & 0x80)) {
137       ret.ptr = ptr + i + 1;
138       ret.val = val;
139       return ret;
140     }
141   }
142   return ret;
143 }
144 
145 UPB_FORCEINLINE
_upb_Decoder_DecodeVarint(upb_Decoder * d,const char * ptr,uint64_t * val)146 static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr,
147                                              uint64_t* val) {
148   uint64_t byte = (uint8_t)*ptr;
149   if (UPB_LIKELY((byte & 0x80) == 0)) {
150     *val = byte;
151     return ptr + 1;
152   } else {
153     _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte);
154     if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
155     *val = res.val;
156     return res.ptr;
157   }
158 }
159 
160 UPB_FORCEINLINE
_upb_Decoder_DecodeTag(upb_Decoder * d,const char * ptr,uint32_t * val)161 static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr,
162                                           uint32_t* val) {
163   uint64_t byte = (uint8_t)*ptr;
164   if (UPB_LIKELY((byte & 0x80) == 0)) {
165     *val = byte;
166     return ptr + 1;
167   } else {
168     const char* start = ptr;
169     _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte);
170     if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
171       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
172     }
173     *val = res.val;
174     return res.ptr;
175   }
176 }
177 
178 UPB_FORCEINLINE
upb_Decoder_DecodeSize(upb_Decoder * d,const char * ptr,uint32_t * size)179 static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr,
180                                           uint32_t* size) {
181   uint64_t size64;
182   ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64);
183   if (size64 >= INT32_MAX ||
184       !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) {
185     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
186   }
187   *size = size64;
188   return ptr;
189 }
190 
_upb_Decoder_MungeInt32(wireval * val)191 static void _upb_Decoder_MungeInt32(wireval* val) {
192   if (!_upb_IsLittleEndian()) {
193     /* The next stage will memcpy(dst, &val, 4) */
194     val->uint32_val = val->uint64_val;
195   }
196 }
197 
_upb_Decoder_Munge(int type,wireval * val)198 static void _upb_Decoder_Munge(int type, wireval* val) {
199   switch (type) {
200     case kUpb_FieldType_Bool:
201       val->bool_val = val->uint64_val != 0;
202       break;
203     case kUpb_FieldType_SInt32: {
204       uint32_t n = val->uint64_val;
205       val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
206       break;
207     }
208     case kUpb_FieldType_SInt64: {
209       uint64_t n = val->uint64_val;
210       val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
211       break;
212     }
213     case kUpb_FieldType_Int32:
214     case kUpb_FieldType_UInt32:
215     case kUpb_FieldType_Enum:
216       _upb_Decoder_MungeInt32(val);
217       break;
218   }
219 }
220 
_upb_Decoder_NewSubMessage(upb_Decoder * d,const upb_MiniTableSub * subs,const upb_MiniTableField * field)221 static upb_Message* _upb_Decoder_NewSubMessage(
222     upb_Decoder* d, const upb_MiniTableSub* subs,
223     const upb_MiniTableField* field) {
224   const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
225   UPB_ASSERT(subl);
226   upb_Message* msg = _upb_Message_New(subl, &d->arena);
227   if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
228   return msg;
229 }
230 
_upb_Decoder_ReadString(upb_Decoder * d,const char * ptr,int size,upb_StringView * str)231 static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr,
232                                            int size, upb_StringView* str) {
233   const char* str_ptr = ptr;
234   ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena);
235   if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
236   str->data = str_ptr;
237   str->size = size;
238   return ptr;
239 }
240 
241 UPB_FORCEINLINE
_upb_Decoder_RecurseSubMessage(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,uint32_t expected_end_group)242 static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d,
243                                                   const char* ptr,
244                                                   upb_Message* submsg,
245                                                   const upb_MiniTable* subl,
246                                                   uint32_t expected_end_group) {
247   if (--d->depth < 0) {
248     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded);
249   }
250   ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl);
251   d->depth++;
252   if (d->end_group != expected_end_group) {
253     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
254   }
255   return ptr;
256 }
257 
258 UPB_FORCEINLINE
_upb_Decoder_DecodeSubMessage(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,int size)259 static const char* _upb_Decoder_DecodeSubMessage(
260     upb_Decoder* d, const char* ptr, upb_Message* submsg,
261     const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) {
262   int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size);
263   const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
264   UPB_ASSERT(subl);
265   ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP);
266   upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta);
267   return ptr;
268 }
269 
270 UPB_FORCEINLINE
_upb_Decoder_DecodeGroup(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,uint32_t number)271 static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr,
272                                             upb_Message* submsg,
273                                             const upb_MiniTable* subl,
274                                             uint32_t number) {
275   if (_upb_Decoder_IsDone(d, &ptr)) {
276     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
277   }
278   ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number);
279   d->end_group = DECODE_NOGROUP;
280   return ptr;
281 }
282 
283 UPB_FORCEINLINE
_upb_Decoder_DecodeUnknownGroup(upb_Decoder * d,const char * ptr,uint32_t number)284 static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d,
285                                                    const char* ptr,
286                                                    uint32_t number) {
287   return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number);
288 }
289 
290 UPB_FORCEINLINE
_upb_Decoder_DecodeKnownGroup(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTableSub * subs,const upb_MiniTableField * field)291 static const char* _upb_Decoder_DecodeKnownGroup(
292     upb_Decoder* d, const char* ptr, upb_Message* submsg,
293     const upb_MiniTableSub* subs, const upb_MiniTableField* field) {
294   const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
295   UPB_ASSERT(subl);
296   return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number);
297 }
298 
upb_Decoder_EncodeVarint32(uint32_t val,char * ptr)299 static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) {
300   do {
301     uint8_t byte = val & 0x7fU;
302     val >>= 7;
303     if (val) byte |= 0x80U;
304     *(ptr++) = byte;
305   } while (val);
306   return ptr;
307 }
308 
_upb_Decoder_AddUnknownVarints(upb_Decoder * d,upb_Message * msg,uint32_t val1,uint32_t val2)309 static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
310                                            uint32_t val1, uint32_t val2) {
311   char buf[20];
312   char* end = buf;
313   end = upb_Decoder_EncodeVarint32(val1, end);
314   end = upb_Decoder_EncodeVarint32(val2, end);
315 
316   if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
317     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
318   }
319 }
320 
321 UPB_NOINLINE
_upb_Decoder_CheckEnumSlow(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableEnum * e,const upb_MiniTableField * field,uint32_t v)322 static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr,
323                                        upb_Message* msg,
324                                        const upb_MiniTableEnum* e,
325                                        const upb_MiniTableField* field,
326                                        uint32_t v) {
327   if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true;
328 
329   // Unrecognized enum goes into unknown fields.
330   // For packed fields the tag could be arbitrarily far in the past, so we
331   // just re-encode the tag and value here.
332   uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
333   upb_Message* unknown_msg =
334       field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg;
335   _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v);
336   return false;
337 }
338 
339 UPB_FORCEINLINE
_upb_Decoder_CheckEnum(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableEnum * e,const upb_MiniTableField * field,wireval * val)340 static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr,
341                                    upb_Message* msg, const upb_MiniTableEnum* e,
342                                    const upb_MiniTableField* field,
343                                    wireval* val) {
344   uint32_t v = val->uint32_val;
345 
346   _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v);
347   if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true;
348   return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v);
349 }
350 
351 UPB_NOINLINE
_upb_Decoder_DecodeEnumArray(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val)352 static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr,
353                                                 upb_Message* msg,
354                                                 upb_Array* arr,
355                                                 const upb_MiniTableSub* subs,
356                                                 const upb_MiniTableField* field,
357                                                 wireval* val) {
358   const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum;
359   if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr;
360   void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
361   arr->size++;
362   memcpy(mem, val, 4);
363   return ptr;
364 }
365 
366 UPB_FORCEINLINE
_upb_Decoder_DecodeFixedPacked(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTableField * field,int lg2)367 static const char* _upb_Decoder_DecodeFixedPacked(
368     upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
369     const upb_MiniTableField* field, int lg2) {
370   int mask = (1 << lg2) - 1;
371   size_t count = val->size >> lg2;
372   if ((val->size & mask) != 0) {
373     // Length isn't a round multiple of elem size.
374     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
375   }
376   _upb_Decoder_Reserve(d, arr, count);
377   void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
378   arr->size += count;
379   // Note: if/when the decoder supports multi-buffer input, we will need to
380   // handle buffer seams here.
381   if (_upb_IsLittleEndian()) {
382     ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size);
383   } else {
384     int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
385     char* dst = mem;
386     while (!_upb_Decoder_IsDone(d, &ptr)) {
387       if (lg2 == 2) {
388         ptr = upb_WireReader_ReadFixed32(ptr, dst);
389         dst += 4;
390       } else {
391         UPB_ASSERT(lg2 == 3);
392         ptr = upb_WireReader_ReadFixed64(ptr, dst);
393         dst += 8;
394       }
395     }
396     upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta);
397   }
398 
399   return ptr;
400 }
401 
402 UPB_FORCEINLINE
_upb_Decoder_DecodeVarintPacked(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTableField * field,int lg2)403 static const char* _upb_Decoder_DecodeVarintPacked(
404     upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
405     const upb_MiniTableField* field, int lg2) {
406   int scale = 1 << lg2;
407   int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
408   char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
409   while (!_upb_Decoder_IsDone(d, &ptr)) {
410     wireval elem;
411     ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
412     _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem);
413     if (_upb_Decoder_Reserve(d, arr, 1)) {
414       out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
415     }
416     arr->size++;
417     memcpy(out, &elem, scale);
418     out += scale;
419   }
420   upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
421   return ptr;
422 }
423 
424 UPB_NOINLINE
_upb_Decoder_DecodeEnumPacked(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val)425 static const char* _upb_Decoder_DecodeEnumPacked(
426     upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr,
427     const upb_MiniTableSub* subs, const upb_MiniTableField* field,
428     wireval* val) {
429   const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum;
430   int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
431   char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
432   while (!_upb_Decoder_IsDone(d, &ptr)) {
433     wireval elem;
434     ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
435     _upb_Decoder_MungeInt32(&elem);
436     if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) {
437       continue;
438     }
439     if (_upb_Decoder_Reserve(d, arr, 1)) {
440       out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
441     }
442     arr->size++;
443     memcpy(out, &elem, 4);
444     out += 4;
445   }
446   upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
447   return ptr;
448 }
449 
_upb_Decoder_CreateArray(upb_Decoder * d,const upb_MiniTableField * field)450 upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d,
451                                     const upb_MiniTableField* field) {
452   /* Maps descriptor type -> elem_size_lg2.  */
453   static const uint8_t kElemSizeLg2[] = {
454       [0] = -1,  // invalid descriptor type
455       [kUpb_FieldType_Double] = 3,
456       [kUpb_FieldType_Float] = 2,
457       [kUpb_FieldType_Int64] = 3,
458       [kUpb_FieldType_UInt64] = 3,
459       [kUpb_FieldType_Int32] = 2,
460       [kUpb_FieldType_Fixed64] = 3,
461       [kUpb_FieldType_Fixed32] = 2,
462       [kUpb_FieldType_Bool] = 0,
463       [kUpb_FieldType_String] = UPB_SIZE(3, 4),
464       [kUpb_FieldType_Group] = UPB_SIZE(2, 3),
465       [kUpb_FieldType_Message] = UPB_SIZE(2, 3),
466       [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4),
467       [kUpb_FieldType_UInt32] = 2,
468       [kUpb_FieldType_Enum] = 2,
469       [kUpb_FieldType_SFixed32] = 2,
470       [kUpb_FieldType_SFixed64] = 3,
471       [kUpb_FieldType_SInt32] = 2,
472       [kUpb_FieldType_SInt64] = 3,
473   };
474 
475   size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)];
476   upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2);
477   if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
478   return ret;
479 }
480 
_upb_Decoder_DecodeToArray(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val,int op)481 static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr,
482                                               upb_Message* msg,
483                                               const upb_MiniTableSub* subs,
484                                               const upb_MiniTableField* field,
485                                               wireval* val, int op) {
486   upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
487   upb_Array* arr = *arrp;
488   void* mem;
489 
490   if (arr) {
491     _upb_Decoder_Reserve(d, arr, 1);
492   } else {
493     arr = _upb_Decoder_CreateArray(d, field);
494     *arrp = arr;
495   }
496 
497   switch (op) {
498     case kUpb_DecodeOp_Scalar1Byte:
499     case kUpb_DecodeOp_Scalar4Byte:
500     case kUpb_DecodeOp_Scalar8Byte:
501       /* Append scalar value. */
502       mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void);
503       arr->size++;
504       memcpy(mem, val, 1 << op);
505       return ptr;
506     case kUpb_DecodeOp_String:
507       _upb_Decoder_VerifyUtf8(d, ptr, val->size);
508       /* Fallthrough. */
509     case kUpb_DecodeOp_Bytes: {
510       /* Append bytes. */
511       upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size;
512       arr->size++;
513       return _upb_Decoder_ReadString(d, ptr, val->size, str);
514     }
515     case kUpb_DecodeOp_SubMessage: {
516       /* Append submessage / group. */
517       upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field);
518       *UPB_PTR_AT(_upb_array_ptr(arr), arr->size * sizeof(void*),
519                   upb_Message*) = submsg;
520       arr->size++;
521       if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) ==
522                        kUpb_FieldType_Group)) {
523         return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field);
524       } else {
525         return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field,
526                                              val->size);
527       }
528     }
529     case OP_FIXPCK_LG2(2):
530     case OP_FIXPCK_LG2(3):
531       return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field,
532                                             op - OP_FIXPCK_LG2(0));
533     case OP_VARPCK_LG2(0):
534     case OP_VARPCK_LG2(2):
535     case OP_VARPCK_LG2(3):
536       return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field,
537                                              op - OP_VARPCK_LG2(0));
538     case kUpb_DecodeOp_Enum:
539       return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val);
540     case kUpb_DecodeOp_PackedEnum:
541       return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val);
542     default:
543       UPB_UNREACHABLE();
544   }
545 }
546 
_upb_Decoder_CreateMap(upb_Decoder * d,const upb_MiniTable * entry)547 upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) {
548   /* Maps descriptor type -> upb map size.  */
549   static const uint8_t kSizeInMap[] = {
550       [0] = -1,  // invalid descriptor type */
551       [kUpb_FieldType_Double] = 8,
552       [kUpb_FieldType_Float] = 4,
553       [kUpb_FieldType_Int64] = 8,
554       [kUpb_FieldType_UInt64] = 8,
555       [kUpb_FieldType_Int32] = 4,
556       [kUpb_FieldType_Fixed64] = 8,
557       [kUpb_FieldType_Fixed32] = 4,
558       [kUpb_FieldType_Bool] = 1,
559       [kUpb_FieldType_String] = UPB_MAPTYPE_STRING,
560       [kUpb_FieldType_Group] = sizeof(void*),
561       [kUpb_FieldType_Message] = sizeof(void*),
562       [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING,
563       [kUpb_FieldType_UInt32] = 4,
564       [kUpb_FieldType_Enum] = 4,
565       [kUpb_FieldType_SFixed32] = 4,
566       [kUpb_FieldType_SFixed64] = 8,
567       [kUpb_FieldType_SInt32] = 4,
568       [kUpb_FieldType_SInt64] = 8,
569   };
570 
571   const upb_MiniTableField* key_field = &entry->fields[0];
572   const upb_MiniTableField* val_field = &entry->fields[1];
573   char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)];
574   char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)];
575   UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k));
576   UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v));
577   upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size);
578   if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
579   return ret;
580 }
581 
_upb_Decoder_DecodeToMap(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val)582 static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr,
583                                             upb_Message* msg,
584                                             const upb_MiniTableSub* subs,
585                                             const upb_MiniTableField* field,
586                                             wireval* val) {
587   upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
588   upb_Map* map = *map_p;
589   upb_MapEntry ent;
590   UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message);
591   const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg;
592 
593   UPB_ASSERT(entry->field_count == 2);
594   UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0]));
595   UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1]));
596 
597   if (!map) {
598     map = _upb_Decoder_CreateMap(d, entry);
599     *map_p = map;
600   }
601 
602   // Parse map entry.
603   memset(&ent, 0, sizeof(ent));
604 
605   if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message ||
606       entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) {
607     const upb_MiniTable* submsg_table = entry->subs[0].submsg;
608     // Any sub-message entry must be linked.  We do not allow dynamic tree
609     // shaking in this case.
610     UPB_ASSERT(submsg_table);
611 
612     // Create proactively to handle the case where it doesn't appear. */
613     ent.data.v.val = upb_value_ptr(_upb_Message_New(submsg_table, &d->arena));
614   }
615 
616   ptr =
617       _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size);
618   // check if ent had any unknown fields
619   size_t size;
620   upb_Message_GetUnknown(&ent.data, &size);
621   if (size != 0) {
622     char* buf;
623     size_t size;
624     uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
625     upb_EncodeStatus status =
626         upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size);
627     if (status != kUpb_EncodeStatus_Ok) {
628       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
629     }
630     _upb_Decoder_AddUnknownVarints(d, msg, tag, size);
631     if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) {
632       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
633     }
634   } else {
635     if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v,
636                         map->val_size,
637                         &d->arena) == kUpb_MapInsertStatus_OutOfMemory) {
638       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
639     }
640   }
641   return ptr;
642 }
643 
_upb_Decoder_DecodeToSubMessage(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val,int op)644 static const char* _upb_Decoder_DecodeToSubMessage(
645     upb_Decoder* d, const char* ptr, upb_Message* msg,
646     const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val,
647     int op) {
648   void* mem = UPB_PTR_AT(msg, field->offset, void);
649   int type = field->UPB_PRIVATE(descriptortype);
650 
651   if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) &&
652       !_upb_Decoder_CheckEnum(d, ptr, msg,
653                               subs[field->UPB_PRIVATE(submsg_index)].subenum,
654                               field, val)) {
655     return ptr;
656   }
657 
658   /* Set presence if necessary. */
659   if (field->presence > 0) {
660     _upb_sethas_field(msg, field);
661   } else if (field->presence < 0) {
662     /* Oneof case */
663     uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
664     if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) {
665       memset(mem, 0, sizeof(void*));
666     }
667     *oneof_case = field->number;
668   }
669 
670   /* Store into message. */
671   switch (op) {
672     case kUpb_DecodeOp_SubMessage: {
673       upb_Message** submsgp = mem;
674       upb_Message* submsg = *submsgp;
675       if (!submsg) {
676         submsg = _upb_Decoder_NewSubMessage(d, subs, field);
677         *submsgp = submsg;
678       }
679       if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
680         ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field);
681       } else {
682         ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field,
683                                             val->size);
684       }
685       break;
686     }
687     case kUpb_DecodeOp_String:
688       _upb_Decoder_VerifyUtf8(d, ptr, val->size);
689       /* Fallthrough. */
690     case kUpb_DecodeOp_Bytes:
691       return _upb_Decoder_ReadString(d, ptr, val->size, mem);
692     case kUpb_DecodeOp_Scalar8Byte:
693       memcpy(mem, val, 8);
694       break;
695     case kUpb_DecodeOp_Enum:
696     case kUpb_DecodeOp_Scalar4Byte:
697       memcpy(mem, val, 4);
698       break;
699     case kUpb_DecodeOp_Scalar1Byte:
700       memcpy(mem, val, 1);
701       break;
702     default:
703       UPB_UNREACHABLE();
704   }
705 
706   return ptr;
707 }
708 
709 UPB_NOINLINE
_upb_Decoder_CheckRequired(upb_Decoder * d,const char * ptr,const upb_Message * msg,const upb_MiniTable * l)710 const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
711                                        const upb_Message* msg,
712                                        const upb_MiniTable* l) {
713   assert(l->required_count);
714   if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
715     return ptr;
716   }
717   uint64_t msg_head;
718   memcpy(&msg_head, msg, 8);
719   msg_head = _upb_BigEndian_Swap64(msg_head);
720   if (upb_MiniTable_requiredmask(l) & ~msg_head) {
721     d->missing_required = true;
722   }
723   return ptr;
724 }
725 
726 UPB_FORCEINLINE
_upb_Decoder_TryFastDispatch(upb_Decoder * d,const char ** ptr,upb_Message * msg,const upb_MiniTable * layout)727 static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr,
728                                          upb_Message* msg,
729                                          const upb_MiniTable* layout) {
730 #if UPB_FASTTABLE
731   if (layout && layout->table_mask != (unsigned char)-1) {
732     uint16_t tag = _upb_FastDecoder_LoadTag(*ptr);
733     intptr_t table = decode_totable(layout);
734     *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag);
735     return true;
736   }
737 #endif
738   return false;
739 }
740 
upb_Decoder_SkipField(upb_Decoder * d,const char * ptr,uint32_t tag)741 static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr,
742                                          uint32_t tag) {
743   int field_number = tag >> 3;
744   int wire_type = tag & 7;
745   switch (wire_type) {
746     case kUpb_WireType_Varint: {
747       uint64_t val;
748       return _upb_Decoder_DecodeVarint(d, ptr, &val);
749     }
750     case kUpb_WireType_64Bit:
751       return ptr + 8;
752     case kUpb_WireType_32Bit:
753       return ptr + 4;
754     case kUpb_WireType_Delimited: {
755       uint32_t size;
756       ptr = upb_Decoder_DecodeSize(d, ptr, &size);
757       return ptr + size;
758     }
759     case kUpb_WireType_StartGroup:
760       return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
761     default:
762       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
763   }
764 }
765 
766 enum {
767   kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup),
768   kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup),
769   kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint),
770   kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited),
771 };
772 
upb_Decoder_AddKnownMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTableExtension * item_mt,const char * data,uint32_t size)773 static void upb_Decoder_AddKnownMessageSetItem(
774     upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt,
775     const char* data, uint32_t size) {
776   upb_Message_Extension* ext =
777       _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena);
778   if (UPB_UNLIKELY(!ext)) {
779     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
780   }
781   upb_Message* submsg =
782       _upb_Decoder_NewSubMessage(d, &ext->ext->sub, &ext->ext->field);
783   upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg,
784                                        d->extreg, d->options, &d->arena);
785   memcpy(&ext->data, &submsg, sizeof(submsg));
786   if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status);
787 }
788 
upb_Decoder_AddUnknownMessageSetItem(upb_Decoder * d,upb_Message * msg,uint32_t type_id,const char * message_data,uint32_t message_size)789 static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d,
790                                                  upb_Message* msg,
791                                                  uint32_t type_id,
792                                                  const char* message_data,
793                                                  uint32_t message_size) {
794   char buf[60];
795   char* ptr = buf;
796   ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr);
797   ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr);
798   ptr = upb_Decoder_EncodeVarint32(type_id, ptr);
799   ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr);
800   ptr = upb_Decoder_EncodeVarint32(message_size, ptr);
801   char* split = ptr;
802 
803   ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr);
804   char* end = ptr;
805 
806   if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) ||
807       !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) ||
808       !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) {
809     _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
810   }
811 }
812 
upb_Decoder_AddMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTable * t,uint32_t type_id,const char * data,uint32_t size)813 static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg,
814                                           const upb_MiniTable* t,
815                                           uint32_t type_id, const char* data,
816                                           uint32_t size) {
817   const upb_MiniTableExtension* item_mt =
818       upb_ExtensionRegistry_Lookup(d->extreg, t, type_id);
819   if (item_mt) {
820     upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size);
821   } else {
822     upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size);
823   }
824 }
825 
upb_Decoder_DecodeMessageSetItem(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)826 static const char* upb_Decoder_DecodeMessageSetItem(
827     upb_Decoder* d, const char* ptr, upb_Message* msg,
828     const upb_MiniTable* layout) {
829   uint32_t type_id = 0;
830   upb_StringView preserved = {NULL, 0};
831   typedef enum {
832     kUpb_HaveId = 1 << 0,
833     kUpb_HavePayload = 1 << 1,
834   } StateMask;
835   StateMask state_mask = 0;
836   while (!_upb_Decoder_IsDone(d, &ptr)) {
837     uint32_t tag;
838     ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
839     switch (tag) {
840       case kEndItemTag:
841         return ptr;
842       case kTypeIdTag: {
843         uint64_t tmp;
844         ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp);
845         if (state_mask & kUpb_HaveId) break;  // Ignore dup.
846         state_mask |= kUpb_HaveId;
847         type_id = tmp;
848         if (state_mask & kUpb_HavePayload) {
849           upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data,
850                                         preserved.size);
851         }
852         break;
853       }
854       case kMessageTag: {
855         uint32_t size;
856         ptr = upb_Decoder_DecodeSize(d, ptr, &size);
857         const char* data = ptr;
858         ptr += size;
859         if (state_mask & kUpb_HavePayload) break;  // Ignore dup.
860         state_mask |= kUpb_HavePayload;
861         if (state_mask & kUpb_HaveId) {
862           upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size);
863         } else {
864           // Out of order, we must preserve the payload.
865           preserved.data = data;
866           preserved.size = size;
867         }
868         break;
869       }
870       default:
871         // We do not preserve unexpected fields inside a message set item.
872         ptr = upb_Decoder_SkipField(d, ptr, tag);
873         break;
874     }
875   }
876   _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
877 }
878 
_upb_Decoder_FindField(upb_Decoder * d,const upb_MiniTable * t,uint32_t field_number,int * last_field_index)879 static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d,
880                                                         const upb_MiniTable* t,
881                                                         uint32_t field_number,
882                                                         int* last_field_index) {
883   static upb_MiniTableField none = {
884       0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0};
885   if (t == NULL) return &none;
886 
887   size_t idx = ((size_t)field_number) - 1;  // 0 wraps to SIZE_MAX
888   if (idx < t->dense_below) {
889     /* Fastest case: index into dense fields. */
890     goto found;
891   }
892 
893   if (t->dense_below < t->field_count) {
894     /* Linear search non-dense fields. Resume scanning from last_field_index
895      * since fields are usually in order. */
896     size_t last = *last_field_index;
897     for (idx = last; idx < t->field_count; idx++) {
898       if (t->fields[idx].number == field_number) {
899         goto found;
900       }
901     }
902 
903     for (idx = t->dense_below; idx < last; idx++) {
904       if (t->fields[idx].number == field_number) {
905         goto found;
906       }
907     }
908   }
909 
910   if (d->extreg) {
911     switch (t->ext) {
912       case kUpb_ExtMode_Extendable: {
913         const upb_MiniTableExtension* ext =
914             upb_ExtensionRegistry_Lookup(d->extreg, t, field_number);
915         if (ext) return &ext->field;
916         break;
917       }
918       case kUpb_ExtMode_IsMessageSet:
919         if (field_number == kUpb_MsgSet_Item) {
920           static upb_MiniTableField item = {
921               0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0};
922           return &item;
923         }
924         break;
925     }
926   }
927 
928   return &none; /* Unknown field. */
929 
930 found:
931   UPB_ASSERT(t->fields[idx].number == field_number);
932   *last_field_index = idx;
933   return &t->fields[idx];
934 }
935 
_upb_Decoder_GetVarintOp(const upb_MiniTableField * field)936 int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) {
937   static const int8_t kVarintOps[] = {
938       [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField,
939       [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField,
940       [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField,
941       [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte,
942       [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte,
943       [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte,
944       [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField,
945       [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField,
946       [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte,
947       [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField,
948       [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField,
949       [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField,
950       [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField,
951       [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte,
952       [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum,
953       [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
954       [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
955       [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte,
956       [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte,
957       [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField,
958   };
959 
960   return kVarintOps[field->UPB_PRIVATE(descriptortype)];
961 }
962 
963 UPB_FORCEINLINE
_upb_Decoder_CheckUnlinked(const upb_MiniTable * mt,const upb_MiniTableField * field,int * op)964 static void _upb_Decoder_CheckUnlinked(const upb_MiniTable* mt,
965                                        const upb_MiniTableField* field,
966                                        int* op) {
967   // If sub-message is not linked, treat as unknown.
968   if (field->mode & kUpb_LabelFlags_IsExtension) return;
969   const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)];
970   if (sub->submsg) return;
971 #ifndef NDEBUG
972   const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field);
973   if (oneof) {
974     // All other members of the oneof must be message fields that are also
975     // unlinked.
976     do {
977       assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message);
978       const upb_MiniTableSub* oneof_sub =
979           &mt->subs[oneof->UPB_PRIVATE(submsg_index)];
980       assert(!oneof_sub);
981     } while (upb_MiniTable_NextOneofField(mt, &oneof));
982   }
983 #endif  // NDEBUG
984   *op = kUpb_DecodeOp_UnknownField;
985 }
986 
_upb_Decoder_GetDelimitedOp(const upb_MiniTable * mt,const upb_MiniTableField * field)987 int _upb_Decoder_GetDelimitedOp(const upb_MiniTable* mt,
988                                 const upb_MiniTableField* field) {
989   enum { kRepeatedBase = 19 };
990 
991   static const int8_t kDelimitedOps[] = {
992       /* For non-repeated field type. */
993       [kUpb_FakeFieldType_FieldNotFound] =
994           kUpb_DecodeOp_UnknownField,  // Field not found.
995       [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField,
996       [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField,
997       [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField,
998       [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField,
999       [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField,
1000       [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField,
1001       [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField,
1002       [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField,
1003       [kUpb_FieldType_String] = kUpb_DecodeOp_String,
1004       [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField,
1005       [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage,
1006       [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes,
1007       [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField,
1008       [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField,
1009       [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
1010       [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
1011       [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField,
1012       [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField,
1013       [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField,
1014       // For repeated field type. */
1015       [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3),
1016       [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2),
1017       [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3),
1018       [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3),
1019       [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2),
1020       [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3),
1021       [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2),
1022       [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0),
1023       [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String,
1024       [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage,
1025       [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage,
1026       [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes,
1027       [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2),
1028       [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum,
1029       [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2),
1030       [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3),
1031       [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2),
1032       [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3),
1033       // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a
1034       // repeated msgset type
1035   };
1036 
1037   int ndx = field->UPB_PRIVATE(descriptortype);
1038   if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase;
1039   int op = kDelimitedOps[ndx];
1040 
1041   if (op == kUpb_DecodeOp_SubMessage) {
1042     _upb_Decoder_CheckUnlinked(mt, field, &op);
1043   }
1044 
1045   return op;
1046 }
1047 
1048 UPB_FORCEINLINE
_upb_Decoder_DecodeWireValue(upb_Decoder * d,const char * ptr,const upb_MiniTable * mt,const upb_MiniTableField * field,int wire_type,wireval * val,int * op)1049 static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr,
1050                                                 const upb_MiniTable* mt,
1051                                                 const upb_MiniTableField* field,
1052                                                 int wire_type, wireval* val,
1053                                                 int* op) {
1054   static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) |
1055                                          (1 << kUpb_FieldType_Fixed32) |
1056                                          (1 << kUpb_FieldType_SFixed32);
1057 
1058   static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) |
1059                                          (1 << kUpb_FieldType_Fixed64) |
1060                                          (1 << kUpb_FieldType_SFixed64);
1061 
1062   switch (wire_type) {
1063     case kUpb_WireType_Varint:
1064       ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val);
1065       *op = _upb_Decoder_GetVarintOp(field);
1066       _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val);
1067       return ptr;
1068     case kUpb_WireType_32Bit:
1069       *op = kUpb_DecodeOp_Scalar4Byte;
1070       if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) {
1071         *op = kUpb_DecodeOp_UnknownField;
1072       }
1073       return upb_WireReader_ReadFixed32(ptr, &val->uint32_val);
1074     case kUpb_WireType_64Bit:
1075       *op = kUpb_DecodeOp_Scalar8Byte;
1076       if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) {
1077         *op = kUpb_DecodeOp_UnknownField;
1078       }
1079       return upb_WireReader_ReadFixed64(ptr, &val->uint64_val);
1080     case kUpb_WireType_Delimited:
1081       ptr = upb_Decoder_DecodeSize(d, ptr, &val->size);
1082       *op = _upb_Decoder_GetDelimitedOp(mt, field);
1083       return ptr;
1084     case kUpb_WireType_StartGroup:
1085       val->uint32_val = field->number;
1086       if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) {
1087         *op = kUpb_DecodeOp_SubMessage;
1088         _upb_Decoder_CheckUnlinked(mt, field, op);
1089       } else if (field->UPB_PRIVATE(descriptortype) ==
1090                  kUpb_FakeFieldType_MessageSetItem) {
1091         *op = kUpb_DecodeOp_MessageSetItem;
1092       } else {
1093         *op = kUpb_DecodeOp_UnknownField;
1094       }
1095       return ptr;
1096     default:
1097       break;
1098   }
1099   _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
1100 }
1101 
1102 UPB_FORCEINLINE
_upb_Decoder_DecodeKnownField(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout,const upb_MiniTableField * field,int op,wireval * val)1103 static const char* _upb_Decoder_DecodeKnownField(
1104     upb_Decoder* d, const char* ptr, upb_Message* msg,
1105     const upb_MiniTable* layout, const upb_MiniTableField* field, int op,
1106     wireval* val) {
1107   const upb_MiniTableSub* subs = layout->subs;
1108   uint8_t mode = field->mode;
1109 
1110   if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
1111     const upb_MiniTableExtension* ext_layout =
1112         (const upb_MiniTableExtension*)field;
1113     upb_Message_Extension* ext =
1114         _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena);
1115     if (UPB_UNLIKELY(!ext)) {
1116       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
1117     }
1118     d->unknown_msg = msg;
1119     msg = &ext->data;
1120     subs = &ext->ext->sub;
1121   }
1122 
1123   switch (mode & kUpb_FieldMode_Mask) {
1124     case kUpb_FieldMode_Array:
1125       return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op);
1126     case kUpb_FieldMode_Map:
1127       return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val);
1128     case kUpb_FieldMode_Scalar:
1129       return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op);
1130     default:
1131       UPB_UNREACHABLE();
1132   }
1133 }
1134 
_upb_Decoder_ReverseSkipVarint(const char * ptr,uint32_t val)1135 static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr,
1136                                                   uint32_t val) {
1137   uint32_t seen = 0;
1138   do {
1139     ptr--;
1140     seen <<= 7;
1141     seen |= *ptr & 0x7f;
1142   } while (seen != val);
1143   return ptr;
1144 }
1145 
_upb_Decoder_DecodeUnknownField(upb_Decoder * d,const char * ptr,upb_Message * msg,int field_number,int wire_type,wireval val)1146 static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d,
1147                                                    const char* ptr,
1148                                                    upb_Message* msg,
1149                                                    int field_number,
1150                                                    int wire_type, wireval val) {
1151   if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
1152 
1153   // Since unknown fields are the uncommon case, we do a little extra work here
1154   // to walk backwards through the buffer to find the field start.  This frees
1155   // up a register in the fast paths (when the field is known), which leads to
1156   // significant speedups in benchmarks.
1157   const char* start = ptr;
1158 
1159   if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
1160   if (msg) {
1161     switch (wire_type) {
1162       case kUpb_WireType_Varint:
1163       case kUpb_WireType_Delimited:
1164         start--;
1165         while (start[-1] & 0x80) start--;
1166         break;
1167       case kUpb_WireType_32Bit:
1168         start -= 4;
1169         break;
1170       case kUpb_WireType_64Bit:
1171         start -= 8;
1172         break;
1173       default:
1174         break;
1175     }
1176 
1177     assert(start == d->debug_valstart);
1178     uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
1179     start = _upb_Decoder_ReverseSkipVarint(start, tag);
1180     assert(start == d->debug_tagstart);
1181 
1182     if (wire_type == kUpb_WireType_StartGroup) {
1183       d->unknown = start;
1184       d->unknown_msg = msg;
1185       ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
1186       start = d->unknown;
1187       d->unknown = NULL;
1188     }
1189     if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
1190       _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
1191     }
1192   } else if (wire_type == kUpb_WireType_StartGroup) {
1193     ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
1194   }
1195   return ptr;
1196 }
1197 
1198 UPB_NOINLINE
_upb_Decoder_DecodeMessage(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)1199 static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
1200                                               upb_Message* msg,
1201                                               const upb_MiniTable* layout) {
1202   int last_field_index = 0;
1203 
1204 #if UPB_FASTTABLE
1205   // The first time we want to skip fast dispatch, because we may have just been
1206   // invoked by the fast parser to handle a case that it bailed on.
1207   if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast;
1208 #endif
1209 
1210   while (!_upb_Decoder_IsDone(d, &ptr)) {
1211     uint32_t tag;
1212     const upb_MiniTableField* field;
1213     int field_number;
1214     int wire_type;
1215     wireval val;
1216     int op;
1217 
1218     if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break;
1219 
1220 #if UPB_FASTTABLE
1221   nofast:
1222 #endif
1223 
1224 #ifndef NDEBUG
1225     d->debug_tagstart = ptr;
1226 #endif
1227 
1228     UPB_ASSERT(ptr < d->input.limit_ptr);
1229     ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
1230     field_number = tag >> 3;
1231     wire_type = tag & 7;
1232 
1233 #ifndef NDEBUG
1234     d->debug_valstart = ptr;
1235 #endif
1236 
1237     if (wire_type == kUpb_WireType_EndGroup) {
1238       d->end_group = field_number;
1239       return ptr;
1240     }
1241 
1242     field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index);
1243     ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val,
1244                                        &op);
1245 
1246     if (op >= 0) {
1247       ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val);
1248     } else {
1249       switch (op) {
1250         case kUpb_DecodeOp_UnknownField:
1251           ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number,
1252                                                 wire_type, val);
1253           break;
1254         case kUpb_DecodeOp_MessageSetItem:
1255           ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout);
1256           break;
1257       }
1258     }
1259   }
1260 
1261   return UPB_UNLIKELY(layout && layout->required_count)
1262              ? _upb_Decoder_CheckRequired(d, ptr, msg, layout)
1263              : ptr;
1264 }
1265 
_upb_FastDecoder_DecodeGeneric(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1266 const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d,
1267                                            const char* ptr, upb_Message* msg,
1268                                            intptr_t table, uint64_t hasbits,
1269                                            uint64_t data) {
1270   (void)data;
1271   *(uint32_t*)msg |= hasbits;
1272   return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table));
1273 }
1274 
_upb_Decoder_DecodeTop(struct upb_Decoder * d,const char * buf,void * msg,const upb_MiniTable * l)1275 static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d,
1276                                                const char* buf, void* msg,
1277                                                const upb_MiniTable* l) {
1278   if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) {
1279     _upb_Decoder_DecodeMessage(d, buf, msg, l);
1280   }
1281   if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
1282   if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
1283   return kUpb_DecodeStatus_Ok;
1284 }
1285 
1286 UPB_NOINLINE
_upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream * e,const char * ptr,int overrun)1287 const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
1288                                         const char* ptr, int overrun) {
1289   return _upb_EpsCopyInputStream_IsDoneFallbackInline(
1290       e, ptr, overrun, _upb_Decoder_BufferFlipCallback);
1291 }
1292 
upb_Decoder_Decode(upb_Decoder * const decoder,const char * const buf,void * const msg,const upb_MiniTable * const l,upb_Arena * const arena)1293 static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder,
1294                                            const char* const buf,
1295                                            void* const msg,
1296                                            const upb_MiniTable* const l,
1297                                            upb_Arena* const arena) {
1298   if (UPB_SETJMP(decoder->err) == 0) {
1299     decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l);
1300   } else {
1301     UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok);
1302   }
1303 
1304   _upb_MemBlock* blocks =
1305       upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed);
1306   arena->head = decoder->arena.head;
1307   upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed);
1308   return decoder->status;
1309 }
1310 
upb_Decode(const char * buf,size_t size,void * msg,const upb_MiniTable * l,const upb_ExtensionRegistry * extreg,int options,upb_Arena * arena)1311 upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
1312                             const upb_MiniTable* l,
1313                             const upb_ExtensionRegistry* extreg, int options,
1314                             upb_Arena* arena) {
1315   upb_Decoder decoder;
1316   unsigned depth = (unsigned)options >> 16;
1317 
1318   upb_EpsCopyInputStream_Init(&decoder.input, &buf, size,
1319                               options & kUpb_DecodeOption_AliasString);
1320 
1321   decoder.extreg = extreg;
1322   decoder.unknown = NULL;
1323   decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
1324   decoder.end_group = DECODE_NOGROUP;
1325   decoder.options = (uint16_t)options;
1326   decoder.missing_required = false;
1327   decoder.status = kUpb_DecodeStatus_Ok;
1328 
1329   // Violating the encapsulation of the arena for performance reasons.
1330   // This is a temporary arena that we swap into and swap out of when we are
1331   // done.  The temporary arena only needs to be able to handle allocation,
1332   // not fuse or free, so it does not need many of the members to be initialized
1333   // (particularly parent_or_count).
1334   _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed);
1335   decoder.arena.head = arena->head;
1336   decoder.arena.block_alloc = arena->block_alloc;
1337   upb_Atomic_Init(&decoder.arena.blocks, blocks);
1338 
1339   return upb_Decoder_Decode(&decoder, buf, msg, l, arena);
1340 }
1341 
1342 #undef OP_FIXPCK_LG2
1343 #undef OP_VARPCK_LG2
1344