1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "upb/wire/decode.h"
29
30 #include <string.h>
31
32 #include "upb/base/descriptor_constants.h"
33 #include "upb/collections/array_internal.h"
34 #include "upb/collections/map_internal.h"
35 #include "upb/mem/arena_internal.h"
36 #include "upb/mini_table/common.h"
37 #include "upb/mini_table/enum_internal.h"
38 #include "upb/port/atomic.h"
39 #include "upb/wire/common.h"
40 #include "upb/wire/common_internal.h"
41 #include "upb/wire/decode_internal.h"
42 #include "upb/wire/encode.h"
43 #include "upb/wire/eps_copy_input_stream.h"
44 #include "upb/wire/reader.h"
45 #include "upb/wire/swap_internal.h"
46 #include "upb/wire/types.h"
47
48 // Must be last.
49 #include "upb/port/def.inc"
50
51 // A few fake field types for our tables.
52 enum {
53 kUpb_FakeFieldType_FieldNotFound = 0,
54 kUpb_FakeFieldType_MessageSetItem = 19,
55 };
56
57 // DecodeOp: an action to be performed for a wire-type/field-type combination.
58 enum {
59 // Special ops: we don't write data to regular fields for these.
60 kUpb_DecodeOp_UnknownField = -1,
61 kUpb_DecodeOp_MessageSetItem = -2,
62
63 // Scalar-only ops.
64 kUpb_DecodeOp_Scalar1Byte = 0,
65 kUpb_DecodeOp_Scalar4Byte = 2,
66 kUpb_DecodeOp_Scalar8Byte = 3,
67 kUpb_DecodeOp_Enum = 1,
68
69 // Scalar/repeated ops.
70 kUpb_DecodeOp_String = 4,
71 kUpb_DecodeOp_Bytes = 5,
72 kUpb_DecodeOp_SubMessage = 6,
73
74 // Repeated-only ops (also see macros below).
75 kUpb_DecodeOp_PackedEnum = 13,
76 };
77
78 // For packed fields it is helpful to be able to recover the lg2 of the data
79 // size from the op.
80 #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
81 #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
82
83 typedef union {
84 bool bool_val;
85 uint32_t uint32_val;
86 uint64_t uint64_val;
87 uint32_t size;
88 } wireval;
89
90 static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
91 upb_Message* msg,
92 const upb_MiniTable* layout);
93
_upb_Decoder_ErrorJmp(upb_Decoder * d,upb_DecodeStatus status)94 UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d,
95 upb_DecodeStatus status) {
96 assert(status != kUpb_DecodeStatus_Ok);
97 d->status = status;
98 UPB_LONGJMP(d->err, 1);
99 }
100
_upb_FastDecoder_ErrorJmp(upb_Decoder * d,int status)101 const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) {
102 assert(status != kUpb_DecodeStatus_Ok);
103 d->status = status;
104 UPB_LONGJMP(d->err, 1);
105 return NULL;
106 }
107
_upb_Decoder_VerifyUtf8(upb_Decoder * d,const char * buf,int len)108 static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) {
109 if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) {
110 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8);
111 }
112 }
113
_upb_Decoder_Reserve(upb_Decoder * d,upb_Array * arr,size_t elem)114 static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
115 bool need_realloc = arr->capacity - arr->size < elem;
116 if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) {
117 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
118 }
119 return need_realloc;
120 }
121
122 typedef struct {
123 const char* ptr;
124 uint64_t val;
125 } _upb_DecodeLongVarintReturn;
126
127 UPB_NOINLINE
_upb_Decoder_DecodeLongVarint(const char * ptr,uint64_t val)128 static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint(
129 const char* ptr, uint64_t val) {
130 _upb_DecodeLongVarintReturn ret = {NULL, 0};
131 uint64_t byte;
132 int i;
133 for (i = 1; i < 10; i++) {
134 byte = (uint8_t)ptr[i];
135 val += (byte - 1) << (i * 7);
136 if (!(byte & 0x80)) {
137 ret.ptr = ptr + i + 1;
138 ret.val = val;
139 return ret;
140 }
141 }
142 return ret;
143 }
144
145 UPB_FORCEINLINE
_upb_Decoder_DecodeVarint(upb_Decoder * d,const char * ptr,uint64_t * val)146 static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr,
147 uint64_t* val) {
148 uint64_t byte = (uint8_t)*ptr;
149 if (UPB_LIKELY((byte & 0x80) == 0)) {
150 *val = byte;
151 return ptr + 1;
152 } else {
153 _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte);
154 if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
155 *val = res.val;
156 return res.ptr;
157 }
158 }
159
160 UPB_FORCEINLINE
_upb_Decoder_DecodeTag(upb_Decoder * d,const char * ptr,uint32_t * val)161 static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr,
162 uint32_t* val) {
163 uint64_t byte = (uint8_t)*ptr;
164 if (UPB_LIKELY((byte & 0x80) == 0)) {
165 *val = byte;
166 return ptr + 1;
167 } else {
168 const char* start = ptr;
169 _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte);
170 if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
171 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
172 }
173 *val = res.val;
174 return res.ptr;
175 }
176 }
177
178 UPB_FORCEINLINE
upb_Decoder_DecodeSize(upb_Decoder * d,const char * ptr,uint32_t * size)179 static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr,
180 uint32_t* size) {
181 uint64_t size64;
182 ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64);
183 if (size64 >= INT32_MAX ||
184 !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) {
185 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
186 }
187 *size = size64;
188 return ptr;
189 }
190
_upb_Decoder_MungeInt32(wireval * val)191 static void _upb_Decoder_MungeInt32(wireval* val) {
192 if (!_upb_IsLittleEndian()) {
193 /* The next stage will memcpy(dst, &val, 4) */
194 val->uint32_val = val->uint64_val;
195 }
196 }
197
_upb_Decoder_Munge(int type,wireval * val)198 static void _upb_Decoder_Munge(int type, wireval* val) {
199 switch (type) {
200 case kUpb_FieldType_Bool:
201 val->bool_val = val->uint64_val != 0;
202 break;
203 case kUpb_FieldType_SInt32: {
204 uint32_t n = val->uint64_val;
205 val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
206 break;
207 }
208 case kUpb_FieldType_SInt64: {
209 uint64_t n = val->uint64_val;
210 val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
211 break;
212 }
213 case kUpb_FieldType_Int32:
214 case kUpb_FieldType_UInt32:
215 case kUpb_FieldType_Enum:
216 _upb_Decoder_MungeInt32(val);
217 break;
218 }
219 }
220
_upb_Decoder_NewSubMessage(upb_Decoder * d,const upb_MiniTableSub * subs,const upb_MiniTableField * field)221 static upb_Message* _upb_Decoder_NewSubMessage(
222 upb_Decoder* d, const upb_MiniTableSub* subs,
223 const upb_MiniTableField* field) {
224 const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
225 UPB_ASSERT(subl);
226 upb_Message* msg = _upb_Message_New(subl, &d->arena);
227 if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
228 return msg;
229 }
230
_upb_Decoder_ReadString(upb_Decoder * d,const char * ptr,int size,upb_StringView * str)231 static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr,
232 int size, upb_StringView* str) {
233 const char* str_ptr = ptr;
234 ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena);
235 if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
236 str->data = str_ptr;
237 str->size = size;
238 return ptr;
239 }
240
241 UPB_FORCEINLINE
_upb_Decoder_RecurseSubMessage(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,uint32_t expected_end_group)242 static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d,
243 const char* ptr,
244 upb_Message* submsg,
245 const upb_MiniTable* subl,
246 uint32_t expected_end_group) {
247 if (--d->depth < 0) {
248 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded);
249 }
250 ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl);
251 d->depth++;
252 if (d->end_group != expected_end_group) {
253 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
254 }
255 return ptr;
256 }
257
258 UPB_FORCEINLINE
_upb_Decoder_DecodeSubMessage(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,int size)259 static const char* _upb_Decoder_DecodeSubMessage(
260 upb_Decoder* d, const char* ptr, upb_Message* submsg,
261 const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) {
262 int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size);
263 const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
264 UPB_ASSERT(subl);
265 ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP);
266 upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta);
267 return ptr;
268 }
269
270 UPB_FORCEINLINE
_upb_Decoder_DecodeGroup(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,uint32_t number)271 static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr,
272 upb_Message* submsg,
273 const upb_MiniTable* subl,
274 uint32_t number) {
275 if (_upb_Decoder_IsDone(d, &ptr)) {
276 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
277 }
278 ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number);
279 d->end_group = DECODE_NOGROUP;
280 return ptr;
281 }
282
283 UPB_FORCEINLINE
_upb_Decoder_DecodeUnknownGroup(upb_Decoder * d,const char * ptr,uint32_t number)284 static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d,
285 const char* ptr,
286 uint32_t number) {
287 return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number);
288 }
289
290 UPB_FORCEINLINE
_upb_Decoder_DecodeKnownGroup(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTableSub * subs,const upb_MiniTableField * field)291 static const char* _upb_Decoder_DecodeKnownGroup(
292 upb_Decoder* d, const char* ptr, upb_Message* submsg,
293 const upb_MiniTableSub* subs, const upb_MiniTableField* field) {
294 const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
295 UPB_ASSERT(subl);
296 return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number);
297 }
298
upb_Decoder_EncodeVarint32(uint32_t val,char * ptr)299 static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) {
300 do {
301 uint8_t byte = val & 0x7fU;
302 val >>= 7;
303 if (val) byte |= 0x80U;
304 *(ptr++) = byte;
305 } while (val);
306 return ptr;
307 }
308
_upb_Decoder_AddUnknownVarints(upb_Decoder * d,upb_Message * msg,uint32_t val1,uint32_t val2)309 static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
310 uint32_t val1, uint32_t val2) {
311 char buf[20];
312 char* end = buf;
313 end = upb_Decoder_EncodeVarint32(val1, end);
314 end = upb_Decoder_EncodeVarint32(val2, end);
315
316 if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
317 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
318 }
319 }
320
321 UPB_NOINLINE
_upb_Decoder_CheckEnumSlow(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableEnum * e,const upb_MiniTableField * field,uint32_t v)322 static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr,
323 upb_Message* msg,
324 const upb_MiniTableEnum* e,
325 const upb_MiniTableField* field,
326 uint32_t v) {
327 if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true;
328
329 // Unrecognized enum goes into unknown fields.
330 // For packed fields the tag could be arbitrarily far in the past, so we
331 // just re-encode the tag and value here.
332 uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
333 upb_Message* unknown_msg =
334 field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg;
335 _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v);
336 return false;
337 }
338
339 UPB_FORCEINLINE
_upb_Decoder_CheckEnum(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableEnum * e,const upb_MiniTableField * field,wireval * val)340 static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr,
341 upb_Message* msg, const upb_MiniTableEnum* e,
342 const upb_MiniTableField* field,
343 wireval* val) {
344 uint32_t v = val->uint32_val;
345
346 _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v);
347 if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true;
348 return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v);
349 }
350
351 UPB_NOINLINE
_upb_Decoder_DecodeEnumArray(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val)352 static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr,
353 upb_Message* msg,
354 upb_Array* arr,
355 const upb_MiniTableSub* subs,
356 const upb_MiniTableField* field,
357 wireval* val) {
358 const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum;
359 if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr;
360 void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
361 arr->size++;
362 memcpy(mem, val, 4);
363 return ptr;
364 }
365
366 UPB_FORCEINLINE
_upb_Decoder_DecodeFixedPacked(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTableField * field,int lg2)367 static const char* _upb_Decoder_DecodeFixedPacked(
368 upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
369 const upb_MiniTableField* field, int lg2) {
370 int mask = (1 << lg2) - 1;
371 size_t count = val->size >> lg2;
372 if ((val->size & mask) != 0) {
373 // Length isn't a round multiple of elem size.
374 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
375 }
376 _upb_Decoder_Reserve(d, arr, count);
377 void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
378 arr->size += count;
379 // Note: if/when the decoder supports multi-buffer input, we will need to
380 // handle buffer seams here.
381 if (_upb_IsLittleEndian()) {
382 ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size);
383 } else {
384 int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
385 char* dst = mem;
386 while (!_upb_Decoder_IsDone(d, &ptr)) {
387 if (lg2 == 2) {
388 ptr = upb_WireReader_ReadFixed32(ptr, dst);
389 dst += 4;
390 } else {
391 UPB_ASSERT(lg2 == 3);
392 ptr = upb_WireReader_ReadFixed64(ptr, dst);
393 dst += 8;
394 }
395 }
396 upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta);
397 }
398
399 return ptr;
400 }
401
402 UPB_FORCEINLINE
_upb_Decoder_DecodeVarintPacked(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTableField * field,int lg2)403 static const char* _upb_Decoder_DecodeVarintPacked(
404 upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
405 const upb_MiniTableField* field, int lg2) {
406 int scale = 1 << lg2;
407 int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
408 char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
409 while (!_upb_Decoder_IsDone(d, &ptr)) {
410 wireval elem;
411 ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
412 _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem);
413 if (_upb_Decoder_Reserve(d, arr, 1)) {
414 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
415 }
416 arr->size++;
417 memcpy(out, &elem, scale);
418 out += scale;
419 }
420 upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
421 return ptr;
422 }
423
424 UPB_NOINLINE
_upb_Decoder_DecodeEnumPacked(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val)425 static const char* _upb_Decoder_DecodeEnumPacked(
426 upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr,
427 const upb_MiniTableSub* subs, const upb_MiniTableField* field,
428 wireval* val) {
429 const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum;
430 int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
431 char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
432 while (!_upb_Decoder_IsDone(d, &ptr)) {
433 wireval elem;
434 ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
435 _upb_Decoder_MungeInt32(&elem);
436 if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) {
437 continue;
438 }
439 if (_upb_Decoder_Reserve(d, arr, 1)) {
440 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
441 }
442 arr->size++;
443 memcpy(out, &elem, 4);
444 out += 4;
445 }
446 upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
447 return ptr;
448 }
449
_upb_Decoder_CreateArray(upb_Decoder * d,const upb_MiniTableField * field)450 upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d,
451 const upb_MiniTableField* field) {
452 /* Maps descriptor type -> elem_size_lg2. */
453 static const uint8_t kElemSizeLg2[] = {
454 [0] = -1, // invalid descriptor type
455 [kUpb_FieldType_Double] = 3,
456 [kUpb_FieldType_Float] = 2,
457 [kUpb_FieldType_Int64] = 3,
458 [kUpb_FieldType_UInt64] = 3,
459 [kUpb_FieldType_Int32] = 2,
460 [kUpb_FieldType_Fixed64] = 3,
461 [kUpb_FieldType_Fixed32] = 2,
462 [kUpb_FieldType_Bool] = 0,
463 [kUpb_FieldType_String] = UPB_SIZE(3, 4),
464 [kUpb_FieldType_Group] = UPB_SIZE(2, 3),
465 [kUpb_FieldType_Message] = UPB_SIZE(2, 3),
466 [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4),
467 [kUpb_FieldType_UInt32] = 2,
468 [kUpb_FieldType_Enum] = 2,
469 [kUpb_FieldType_SFixed32] = 2,
470 [kUpb_FieldType_SFixed64] = 3,
471 [kUpb_FieldType_SInt32] = 2,
472 [kUpb_FieldType_SInt64] = 3,
473 };
474
475 size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)];
476 upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2);
477 if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
478 return ret;
479 }
480
_upb_Decoder_DecodeToArray(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val,int op)481 static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr,
482 upb_Message* msg,
483 const upb_MiniTableSub* subs,
484 const upb_MiniTableField* field,
485 wireval* val, int op) {
486 upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
487 upb_Array* arr = *arrp;
488 void* mem;
489
490 if (arr) {
491 _upb_Decoder_Reserve(d, arr, 1);
492 } else {
493 arr = _upb_Decoder_CreateArray(d, field);
494 *arrp = arr;
495 }
496
497 switch (op) {
498 case kUpb_DecodeOp_Scalar1Byte:
499 case kUpb_DecodeOp_Scalar4Byte:
500 case kUpb_DecodeOp_Scalar8Byte:
501 /* Append scalar value. */
502 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void);
503 arr->size++;
504 memcpy(mem, val, 1 << op);
505 return ptr;
506 case kUpb_DecodeOp_String:
507 _upb_Decoder_VerifyUtf8(d, ptr, val->size);
508 /* Fallthrough. */
509 case kUpb_DecodeOp_Bytes: {
510 /* Append bytes. */
511 upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size;
512 arr->size++;
513 return _upb_Decoder_ReadString(d, ptr, val->size, str);
514 }
515 case kUpb_DecodeOp_SubMessage: {
516 /* Append submessage / group. */
517 upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field);
518 *UPB_PTR_AT(_upb_array_ptr(arr), arr->size * sizeof(void*),
519 upb_Message*) = submsg;
520 arr->size++;
521 if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) ==
522 kUpb_FieldType_Group)) {
523 return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field);
524 } else {
525 return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field,
526 val->size);
527 }
528 }
529 case OP_FIXPCK_LG2(2):
530 case OP_FIXPCK_LG2(3):
531 return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field,
532 op - OP_FIXPCK_LG2(0));
533 case OP_VARPCK_LG2(0):
534 case OP_VARPCK_LG2(2):
535 case OP_VARPCK_LG2(3):
536 return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field,
537 op - OP_VARPCK_LG2(0));
538 case kUpb_DecodeOp_Enum:
539 return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val);
540 case kUpb_DecodeOp_PackedEnum:
541 return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val);
542 default:
543 UPB_UNREACHABLE();
544 }
545 }
546
_upb_Decoder_CreateMap(upb_Decoder * d,const upb_MiniTable * entry)547 upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) {
548 /* Maps descriptor type -> upb map size. */
549 static const uint8_t kSizeInMap[] = {
550 [0] = -1, // invalid descriptor type */
551 [kUpb_FieldType_Double] = 8,
552 [kUpb_FieldType_Float] = 4,
553 [kUpb_FieldType_Int64] = 8,
554 [kUpb_FieldType_UInt64] = 8,
555 [kUpb_FieldType_Int32] = 4,
556 [kUpb_FieldType_Fixed64] = 8,
557 [kUpb_FieldType_Fixed32] = 4,
558 [kUpb_FieldType_Bool] = 1,
559 [kUpb_FieldType_String] = UPB_MAPTYPE_STRING,
560 [kUpb_FieldType_Group] = sizeof(void*),
561 [kUpb_FieldType_Message] = sizeof(void*),
562 [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING,
563 [kUpb_FieldType_UInt32] = 4,
564 [kUpb_FieldType_Enum] = 4,
565 [kUpb_FieldType_SFixed32] = 4,
566 [kUpb_FieldType_SFixed64] = 8,
567 [kUpb_FieldType_SInt32] = 4,
568 [kUpb_FieldType_SInt64] = 8,
569 };
570
571 const upb_MiniTableField* key_field = &entry->fields[0];
572 const upb_MiniTableField* val_field = &entry->fields[1];
573 char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)];
574 char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)];
575 UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k));
576 UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v));
577 upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size);
578 if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
579 return ret;
580 }
581
_upb_Decoder_DecodeToMap(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val)582 static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr,
583 upb_Message* msg,
584 const upb_MiniTableSub* subs,
585 const upb_MiniTableField* field,
586 wireval* val) {
587 upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
588 upb_Map* map = *map_p;
589 upb_MapEntry ent;
590 UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message);
591 const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg;
592
593 UPB_ASSERT(entry->field_count == 2);
594 UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0]));
595 UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1]));
596
597 if (!map) {
598 map = _upb_Decoder_CreateMap(d, entry);
599 *map_p = map;
600 }
601
602 // Parse map entry.
603 memset(&ent, 0, sizeof(ent));
604
605 if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message ||
606 entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) {
607 const upb_MiniTable* submsg_table = entry->subs[0].submsg;
608 // Any sub-message entry must be linked. We do not allow dynamic tree
609 // shaking in this case.
610 UPB_ASSERT(submsg_table);
611
612 // Create proactively to handle the case where it doesn't appear. */
613 ent.data.v.val = upb_value_ptr(_upb_Message_New(submsg_table, &d->arena));
614 }
615
616 ptr =
617 _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size);
618 // check if ent had any unknown fields
619 size_t size;
620 upb_Message_GetUnknown(&ent.data, &size);
621 if (size != 0) {
622 char* buf;
623 size_t size;
624 uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
625 upb_EncodeStatus status =
626 upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size);
627 if (status != kUpb_EncodeStatus_Ok) {
628 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
629 }
630 _upb_Decoder_AddUnknownVarints(d, msg, tag, size);
631 if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) {
632 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
633 }
634 } else {
635 if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v,
636 map->val_size,
637 &d->arena) == kUpb_MapInsertStatus_OutOfMemory) {
638 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
639 }
640 }
641 return ptr;
642 }
643
_upb_Decoder_DecodeToSubMessage(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTableSub * subs,const upb_MiniTableField * field,wireval * val,int op)644 static const char* _upb_Decoder_DecodeToSubMessage(
645 upb_Decoder* d, const char* ptr, upb_Message* msg,
646 const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val,
647 int op) {
648 void* mem = UPB_PTR_AT(msg, field->offset, void);
649 int type = field->UPB_PRIVATE(descriptortype);
650
651 if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) &&
652 !_upb_Decoder_CheckEnum(d, ptr, msg,
653 subs[field->UPB_PRIVATE(submsg_index)].subenum,
654 field, val)) {
655 return ptr;
656 }
657
658 /* Set presence if necessary. */
659 if (field->presence > 0) {
660 _upb_sethas_field(msg, field);
661 } else if (field->presence < 0) {
662 /* Oneof case */
663 uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
664 if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) {
665 memset(mem, 0, sizeof(void*));
666 }
667 *oneof_case = field->number;
668 }
669
670 /* Store into message. */
671 switch (op) {
672 case kUpb_DecodeOp_SubMessage: {
673 upb_Message** submsgp = mem;
674 upb_Message* submsg = *submsgp;
675 if (!submsg) {
676 submsg = _upb_Decoder_NewSubMessage(d, subs, field);
677 *submsgp = submsg;
678 }
679 if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
680 ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field);
681 } else {
682 ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field,
683 val->size);
684 }
685 break;
686 }
687 case kUpb_DecodeOp_String:
688 _upb_Decoder_VerifyUtf8(d, ptr, val->size);
689 /* Fallthrough. */
690 case kUpb_DecodeOp_Bytes:
691 return _upb_Decoder_ReadString(d, ptr, val->size, mem);
692 case kUpb_DecodeOp_Scalar8Byte:
693 memcpy(mem, val, 8);
694 break;
695 case kUpb_DecodeOp_Enum:
696 case kUpb_DecodeOp_Scalar4Byte:
697 memcpy(mem, val, 4);
698 break;
699 case kUpb_DecodeOp_Scalar1Byte:
700 memcpy(mem, val, 1);
701 break;
702 default:
703 UPB_UNREACHABLE();
704 }
705
706 return ptr;
707 }
708
709 UPB_NOINLINE
_upb_Decoder_CheckRequired(upb_Decoder * d,const char * ptr,const upb_Message * msg,const upb_MiniTable * l)710 const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
711 const upb_Message* msg,
712 const upb_MiniTable* l) {
713 assert(l->required_count);
714 if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
715 return ptr;
716 }
717 uint64_t msg_head;
718 memcpy(&msg_head, msg, 8);
719 msg_head = _upb_BigEndian_Swap64(msg_head);
720 if (upb_MiniTable_requiredmask(l) & ~msg_head) {
721 d->missing_required = true;
722 }
723 return ptr;
724 }
725
726 UPB_FORCEINLINE
_upb_Decoder_TryFastDispatch(upb_Decoder * d,const char ** ptr,upb_Message * msg,const upb_MiniTable * layout)727 static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr,
728 upb_Message* msg,
729 const upb_MiniTable* layout) {
730 #if UPB_FASTTABLE
731 if (layout && layout->table_mask != (unsigned char)-1) {
732 uint16_t tag = _upb_FastDecoder_LoadTag(*ptr);
733 intptr_t table = decode_totable(layout);
734 *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag);
735 return true;
736 }
737 #endif
738 return false;
739 }
740
upb_Decoder_SkipField(upb_Decoder * d,const char * ptr,uint32_t tag)741 static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr,
742 uint32_t tag) {
743 int field_number = tag >> 3;
744 int wire_type = tag & 7;
745 switch (wire_type) {
746 case kUpb_WireType_Varint: {
747 uint64_t val;
748 return _upb_Decoder_DecodeVarint(d, ptr, &val);
749 }
750 case kUpb_WireType_64Bit:
751 return ptr + 8;
752 case kUpb_WireType_32Bit:
753 return ptr + 4;
754 case kUpb_WireType_Delimited: {
755 uint32_t size;
756 ptr = upb_Decoder_DecodeSize(d, ptr, &size);
757 return ptr + size;
758 }
759 case kUpb_WireType_StartGroup:
760 return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
761 default:
762 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
763 }
764 }
765
766 enum {
767 kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup),
768 kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup),
769 kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint),
770 kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited),
771 };
772
upb_Decoder_AddKnownMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTableExtension * item_mt,const char * data,uint32_t size)773 static void upb_Decoder_AddKnownMessageSetItem(
774 upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt,
775 const char* data, uint32_t size) {
776 upb_Message_Extension* ext =
777 _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena);
778 if (UPB_UNLIKELY(!ext)) {
779 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
780 }
781 upb_Message* submsg =
782 _upb_Decoder_NewSubMessage(d, &ext->ext->sub, &ext->ext->field);
783 upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg,
784 d->extreg, d->options, &d->arena);
785 memcpy(&ext->data, &submsg, sizeof(submsg));
786 if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status);
787 }
788
upb_Decoder_AddUnknownMessageSetItem(upb_Decoder * d,upb_Message * msg,uint32_t type_id,const char * message_data,uint32_t message_size)789 static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d,
790 upb_Message* msg,
791 uint32_t type_id,
792 const char* message_data,
793 uint32_t message_size) {
794 char buf[60];
795 char* ptr = buf;
796 ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr);
797 ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr);
798 ptr = upb_Decoder_EncodeVarint32(type_id, ptr);
799 ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr);
800 ptr = upb_Decoder_EncodeVarint32(message_size, ptr);
801 char* split = ptr;
802
803 ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr);
804 char* end = ptr;
805
806 if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) ||
807 !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) ||
808 !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) {
809 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
810 }
811 }
812
upb_Decoder_AddMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTable * t,uint32_t type_id,const char * data,uint32_t size)813 static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg,
814 const upb_MiniTable* t,
815 uint32_t type_id, const char* data,
816 uint32_t size) {
817 const upb_MiniTableExtension* item_mt =
818 upb_ExtensionRegistry_Lookup(d->extreg, t, type_id);
819 if (item_mt) {
820 upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size);
821 } else {
822 upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size);
823 }
824 }
825
upb_Decoder_DecodeMessageSetItem(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)826 static const char* upb_Decoder_DecodeMessageSetItem(
827 upb_Decoder* d, const char* ptr, upb_Message* msg,
828 const upb_MiniTable* layout) {
829 uint32_t type_id = 0;
830 upb_StringView preserved = {NULL, 0};
831 typedef enum {
832 kUpb_HaveId = 1 << 0,
833 kUpb_HavePayload = 1 << 1,
834 } StateMask;
835 StateMask state_mask = 0;
836 while (!_upb_Decoder_IsDone(d, &ptr)) {
837 uint32_t tag;
838 ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
839 switch (tag) {
840 case kEndItemTag:
841 return ptr;
842 case kTypeIdTag: {
843 uint64_t tmp;
844 ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp);
845 if (state_mask & kUpb_HaveId) break; // Ignore dup.
846 state_mask |= kUpb_HaveId;
847 type_id = tmp;
848 if (state_mask & kUpb_HavePayload) {
849 upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data,
850 preserved.size);
851 }
852 break;
853 }
854 case kMessageTag: {
855 uint32_t size;
856 ptr = upb_Decoder_DecodeSize(d, ptr, &size);
857 const char* data = ptr;
858 ptr += size;
859 if (state_mask & kUpb_HavePayload) break; // Ignore dup.
860 state_mask |= kUpb_HavePayload;
861 if (state_mask & kUpb_HaveId) {
862 upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size);
863 } else {
864 // Out of order, we must preserve the payload.
865 preserved.data = data;
866 preserved.size = size;
867 }
868 break;
869 }
870 default:
871 // We do not preserve unexpected fields inside a message set item.
872 ptr = upb_Decoder_SkipField(d, ptr, tag);
873 break;
874 }
875 }
876 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
877 }
878
_upb_Decoder_FindField(upb_Decoder * d,const upb_MiniTable * t,uint32_t field_number,int * last_field_index)879 static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d,
880 const upb_MiniTable* t,
881 uint32_t field_number,
882 int* last_field_index) {
883 static upb_MiniTableField none = {
884 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0};
885 if (t == NULL) return &none;
886
887 size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
888 if (idx < t->dense_below) {
889 /* Fastest case: index into dense fields. */
890 goto found;
891 }
892
893 if (t->dense_below < t->field_count) {
894 /* Linear search non-dense fields. Resume scanning from last_field_index
895 * since fields are usually in order. */
896 size_t last = *last_field_index;
897 for (idx = last; idx < t->field_count; idx++) {
898 if (t->fields[idx].number == field_number) {
899 goto found;
900 }
901 }
902
903 for (idx = t->dense_below; idx < last; idx++) {
904 if (t->fields[idx].number == field_number) {
905 goto found;
906 }
907 }
908 }
909
910 if (d->extreg) {
911 switch (t->ext) {
912 case kUpb_ExtMode_Extendable: {
913 const upb_MiniTableExtension* ext =
914 upb_ExtensionRegistry_Lookup(d->extreg, t, field_number);
915 if (ext) return &ext->field;
916 break;
917 }
918 case kUpb_ExtMode_IsMessageSet:
919 if (field_number == kUpb_MsgSet_Item) {
920 static upb_MiniTableField item = {
921 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0};
922 return &item;
923 }
924 break;
925 }
926 }
927
928 return &none; /* Unknown field. */
929
930 found:
931 UPB_ASSERT(t->fields[idx].number == field_number);
932 *last_field_index = idx;
933 return &t->fields[idx];
934 }
935
_upb_Decoder_GetVarintOp(const upb_MiniTableField * field)936 int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) {
937 static const int8_t kVarintOps[] = {
938 [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField,
939 [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField,
940 [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField,
941 [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte,
942 [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte,
943 [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte,
944 [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField,
945 [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField,
946 [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte,
947 [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField,
948 [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField,
949 [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField,
950 [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField,
951 [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte,
952 [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum,
953 [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
954 [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
955 [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte,
956 [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte,
957 [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField,
958 };
959
960 return kVarintOps[field->UPB_PRIVATE(descriptortype)];
961 }
962
963 UPB_FORCEINLINE
_upb_Decoder_CheckUnlinked(const upb_MiniTable * mt,const upb_MiniTableField * field,int * op)964 static void _upb_Decoder_CheckUnlinked(const upb_MiniTable* mt,
965 const upb_MiniTableField* field,
966 int* op) {
967 // If sub-message is not linked, treat as unknown.
968 if (field->mode & kUpb_LabelFlags_IsExtension) return;
969 const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)];
970 if (sub->submsg) return;
971 #ifndef NDEBUG
972 const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field);
973 if (oneof) {
974 // All other members of the oneof must be message fields that are also
975 // unlinked.
976 do {
977 assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message);
978 const upb_MiniTableSub* oneof_sub =
979 &mt->subs[oneof->UPB_PRIVATE(submsg_index)];
980 assert(!oneof_sub);
981 } while (upb_MiniTable_NextOneofField(mt, &oneof));
982 }
983 #endif // NDEBUG
984 *op = kUpb_DecodeOp_UnknownField;
985 }
986
_upb_Decoder_GetDelimitedOp(const upb_MiniTable * mt,const upb_MiniTableField * field)987 int _upb_Decoder_GetDelimitedOp(const upb_MiniTable* mt,
988 const upb_MiniTableField* field) {
989 enum { kRepeatedBase = 19 };
990
991 static const int8_t kDelimitedOps[] = {
992 /* For non-repeated field type. */
993 [kUpb_FakeFieldType_FieldNotFound] =
994 kUpb_DecodeOp_UnknownField, // Field not found.
995 [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField,
996 [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField,
997 [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField,
998 [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField,
999 [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField,
1000 [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField,
1001 [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField,
1002 [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField,
1003 [kUpb_FieldType_String] = kUpb_DecodeOp_String,
1004 [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField,
1005 [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage,
1006 [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes,
1007 [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField,
1008 [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField,
1009 [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
1010 [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
1011 [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField,
1012 [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField,
1013 [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField,
1014 // For repeated field type. */
1015 [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3),
1016 [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2),
1017 [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3),
1018 [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3),
1019 [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2),
1020 [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3),
1021 [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2),
1022 [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0),
1023 [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String,
1024 [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage,
1025 [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage,
1026 [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes,
1027 [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2),
1028 [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum,
1029 [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2),
1030 [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3),
1031 [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2),
1032 [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3),
1033 // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a
1034 // repeated msgset type
1035 };
1036
1037 int ndx = field->UPB_PRIVATE(descriptortype);
1038 if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase;
1039 int op = kDelimitedOps[ndx];
1040
1041 if (op == kUpb_DecodeOp_SubMessage) {
1042 _upb_Decoder_CheckUnlinked(mt, field, &op);
1043 }
1044
1045 return op;
1046 }
1047
1048 UPB_FORCEINLINE
_upb_Decoder_DecodeWireValue(upb_Decoder * d,const char * ptr,const upb_MiniTable * mt,const upb_MiniTableField * field,int wire_type,wireval * val,int * op)1049 static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr,
1050 const upb_MiniTable* mt,
1051 const upb_MiniTableField* field,
1052 int wire_type, wireval* val,
1053 int* op) {
1054 static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) |
1055 (1 << kUpb_FieldType_Fixed32) |
1056 (1 << kUpb_FieldType_SFixed32);
1057
1058 static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) |
1059 (1 << kUpb_FieldType_Fixed64) |
1060 (1 << kUpb_FieldType_SFixed64);
1061
1062 switch (wire_type) {
1063 case kUpb_WireType_Varint:
1064 ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val);
1065 *op = _upb_Decoder_GetVarintOp(field);
1066 _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val);
1067 return ptr;
1068 case kUpb_WireType_32Bit:
1069 *op = kUpb_DecodeOp_Scalar4Byte;
1070 if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) {
1071 *op = kUpb_DecodeOp_UnknownField;
1072 }
1073 return upb_WireReader_ReadFixed32(ptr, &val->uint32_val);
1074 case kUpb_WireType_64Bit:
1075 *op = kUpb_DecodeOp_Scalar8Byte;
1076 if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) {
1077 *op = kUpb_DecodeOp_UnknownField;
1078 }
1079 return upb_WireReader_ReadFixed64(ptr, &val->uint64_val);
1080 case kUpb_WireType_Delimited:
1081 ptr = upb_Decoder_DecodeSize(d, ptr, &val->size);
1082 *op = _upb_Decoder_GetDelimitedOp(mt, field);
1083 return ptr;
1084 case kUpb_WireType_StartGroup:
1085 val->uint32_val = field->number;
1086 if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) {
1087 *op = kUpb_DecodeOp_SubMessage;
1088 _upb_Decoder_CheckUnlinked(mt, field, op);
1089 } else if (field->UPB_PRIVATE(descriptortype) ==
1090 kUpb_FakeFieldType_MessageSetItem) {
1091 *op = kUpb_DecodeOp_MessageSetItem;
1092 } else {
1093 *op = kUpb_DecodeOp_UnknownField;
1094 }
1095 return ptr;
1096 default:
1097 break;
1098 }
1099 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
1100 }
1101
1102 UPB_FORCEINLINE
_upb_Decoder_DecodeKnownField(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout,const upb_MiniTableField * field,int op,wireval * val)1103 static const char* _upb_Decoder_DecodeKnownField(
1104 upb_Decoder* d, const char* ptr, upb_Message* msg,
1105 const upb_MiniTable* layout, const upb_MiniTableField* field, int op,
1106 wireval* val) {
1107 const upb_MiniTableSub* subs = layout->subs;
1108 uint8_t mode = field->mode;
1109
1110 if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
1111 const upb_MiniTableExtension* ext_layout =
1112 (const upb_MiniTableExtension*)field;
1113 upb_Message_Extension* ext =
1114 _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena);
1115 if (UPB_UNLIKELY(!ext)) {
1116 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
1117 }
1118 d->unknown_msg = msg;
1119 msg = &ext->data;
1120 subs = &ext->ext->sub;
1121 }
1122
1123 switch (mode & kUpb_FieldMode_Mask) {
1124 case kUpb_FieldMode_Array:
1125 return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op);
1126 case kUpb_FieldMode_Map:
1127 return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val);
1128 case kUpb_FieldMode_Scalar:
1129 return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op);
1130 default:
1131 UPB_UNREACHABLE();
1132 }
1133 }
1134
_upb_Decoder_ReverseSkipVarint(const char * ptr,uint32_t val)1135 static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr,
1136 uint32_t val) {
1137 uint32_t seen = 0;
1138 do {
1139 ptr--;
1140 seen <<= 7;
1141 seen |= *ptr & 0x7f;
1142 } while (seen != val);
1143 return ptr;
1144 }
1145
_upb_Decoder_DecodeUnknownField(upb_Decoder * d,const char * ptr,upb_Message * msg,int field_number,int wire_type,wireval val)1146 static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d,
1147 const char* ptr,
1148 upb_Message* msg,
1149 int field_number,
1150 int wire_type, wireval val) {
1151 if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
1152
1153 // Since unknown fields are the uncommon case, we do a little extra work here
1154 // to walk backwards through the buffer to find the field start. This frees
1155 // up a register in the fast paths (when the field is known), which leads to
1156 // significant speedups in benchmarks.
1157 const char* start = ptr;
1158
1159 if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
1160 if (msg) {
1161 switch (wire_type) {
1162 case kUpb_WireType_Varint:
1163 case kUpb_WireType_Delimited:
1164 start--;
1165 while (start[-1] & 0x80) start--;
1166 break;
1167 case kUpb_WireType_32Bit:
1168 start -= 4;
1169 break;
1170 case kUpb_WireType_64Bit:
1171 start -= 8;
1172 break;
1173 default:
1174 break;
1175 }
1176
1177 assert(start == d->debug_valstart);
1178 uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
1179 start = _upb_Decoder_ReverseSkipVarint(start, tag);
1180 assert(start == d->debug_tagstart);
1181
1182 if (wire_type == kUpb_WireType_StartGroup) {
1183 d->unknown = start;
1184 d->unknown_msg = msg;
1185 ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
1186 start = d->unknown;
1187 d->unknown = NULL;
1188 }
1189 if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
1190 _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
1191 }
1192 } else if (wire_type == kUpb_WireType_StartGroup) {
1193 ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
1194 }
1195 return ptr;
1196 }
1197
1198 UPB_NOINLINE
_upb_Decoder_DecodeMessage(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)1199 static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
1200 upb_Message* msg,
1201 const upb_MiniTable* layout) {
1202 int last_field_index = 0;
1203
1204 #if UPB_FASTTABLE
1205 // The first time we want to skip fast dispatch, because we may have just been
1206 // invoked by the fast parser to handle a case that it bailed on.
1207 if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast;
1208 #endif
1209
1210 while (!_upb_Decoder_IsDone(d, &ptr)) {
1211 uint32_t tag;
1212 const upb_MiniTableField* field;
1213 int field_number;
1214 int wire_type;
1215 wireval val;
1216 int op;
1217
1218 if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break;
1219
1220 #if UPB_FASTTABLE
1221 nofast:
1222 #endif
1223
1224 #ifndef NDEBUG
1225 d->debug_tagstart = ptr;
1226 #endif
1227
1228 UPB_ASSERT(ptr < d->input.limit_ptr);
1229 ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
1230 field_number = tag >> 3;
1231 wire_type = tag & 7;
1232
1233 #ifndef NDEBUG
1234 d->debug_valstart = ptr;
1235 #endif
1236
1237 if (wire_type == kUpb_WireType_EndGroup) {
1238 d->end_group = field_number;
1239 return ptr;
1240 }
1241
1242 field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index);
1243 ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val,
1244 &op);
1245
1246 if (op >= 0) {
1247 ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val);
1248 } else {
1249 switch (op) {
1250 case kUpb_DecodeOp_UnknownField:
1251 ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number,
1252 wire_type, val);
1253 break;
1254 case kUpb_DecodeOp_MessageSetItem:
1255 ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout);
1256 break;
1257 }
1258 }
1259 }
1260
1261 return UPB_UNLIKELY(layout && layout->required_count)
1262 ? _upb_Decoder_CheckRequired(d, ptr, msg, layout)
1263 : ptr;
1264 }
1265
_upb_FastDecoder_DecodeGeneric(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1266 const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d,
1267 const char* ptr, upb_Message* msg,
1268 intptr_t table, uint64_t hasbits,
1269 uint64_t data) {
1270 (void)data;
1271 *(uint32_t*)msg |= hasbits;
1272 return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table));
1273 }
1274
_upb_Decoder_DecodeTop(struct upb_Decoder * d,const char * buf,void * msg,const upb_MiniTable * l)1275 static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d,
1276 const char* buf, void* msg,
1277 const upb_MiniTable* l) {
1278 if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) {
1279 _upb_Decoder_DecodeMessage(d, buf, msg, l);
1280 }
1281 if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
1282 if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
1283 return kUpb_DecodeStatus_Ok;
1284 }
1285
1286 UPB_NOINLINE
_upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream * e,const char * ptr,int overrun)1287 const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
1288 const char* ptr, int overrun) {
1289 return _upb_EpsCopyInputStream_IsDoneFallbackInline(
1290 e, ptr, overrun, _upb_Decoder_BufferFlipCallback);
1291 }
1292
upb_Decoder_Decode(upb_Decoder * const decoder,const char * const buf,void * const msg,const upb_MiniTable * const l,upb_Arena * const arena)1293 static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder,
1294 const char* const buf,
1295 void* const msg,
1296 const upb_MiniTable* const l,
1297 upb_Arena* const arena) {
1298 if (UPB_SETJMP(decoder->err) == 0) {
1299 decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l);
1300 } else {
1301 UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok);
1302 }
1303
1304 _upb_MemBlock* blocks =
1305 upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed);
1306 arena->head = decoder->arena.head;
1307 upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed);
1308 return decoder->status;
1309 }
1310
upb_Decode(const char * buf,size_t size,void * msg,const upb_MiniTable * l,const upb_ExtensionRegistry * extreg,int options,upb_Arena * arena)1311 upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
1312 const upb_MiniTable* l,
1313 const upb_ExtensionRegistry* extreg, int options,
1314 upb_Arena* arena) {
1315 upb_Decoder decoder;
1316 unsigned depth = (unsigned)options >> 16;
1317
1318 upb_EpsCopyInputStream_Init(&decoder.input, &buf, size,
1319 options & kUpb_DecodeOption_AliasString);
1320
1321 decoder.extreg = extreg;
1322 decoder.unknown = NULL;
1323 decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
1324 decoder.end_group = DECODE_NOGROUP;
1325 decoder.options = (uint16_t)options;
1326 decoder.missing_required = false;
1327 decoder.status = kUpb_DecodeStatus_Ok;
1328
1329 // Violating the encapsulation of the arena for performance reasons.
1330 // This is a temporary arena that we swap into and swap out of when we are
1331 // done. The temporary arena only needs to be able to handle allocation,
1332 // not fuse or free, so it does not need many of the members to be initialized
1333 // (particularly parent_or_count).
1334 _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed);
1335 decoder.arena.head = arena->head;
1336 decoder.arena.block_alloc = arena->block_alloc;
1337 upb_Atomic_Init(&decoder.arena.blocks, blocks);
1338
1339 return upb_Decoder_Decode(&decoder, buf, msg, l, arena);
1340 }
1341
1342 #undef OP_FIXPCK_LG2
1343 #undef OP_VARPCK_LG2
1344