xref: /aosp_15_r20/external/grpc-grpc/third_party/upb/upb/mini_descriptor/decode.c (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb/mini_descriptor/decode.h"
9 
10 #include <inttypes.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include "upb/base/descriptor_constants.h"
16 #include "upb/base/internal/log2.h"
17 #include "upb/base/status.h"
18 #include "upb/base/string_view.h"
19 #include "upb/mem/arena.h"
20 #include "upb/message/internal/map_entry.h"
21 #include "upb/message/internal/types.h"
22 #include "upb/mini_descriptor/internal/base92.h"
23 #include "upb/mini_descriptor/internal/decoder.h"
24 #include "upb/mini_descriptor/internal/modifiers.h"
25 #include "upb/mini_descriptor/internal/wire_constants.h"
26 #include "upb/mini_table/extension.h"
27 #include "upb/mini_table/field.h"
28 #include "upb/mini_table/internal/field.h"
29 #include "upb/mini_table/internal/message.h"
30 #include "upb/mini_table/message.h"
31 #include "upb/mini_table/sub.h"
32 
33 // Must be last.
34 #include "upb/port/def.inc"
35 
36 // We reserve unused hasbits to make room for upb_Message fields.
37 #define kUpb_Reserved_Hasbytes sizeof(struct upb_Message)
38 
39 // 64 is the first hasbit that we currently use.
40 #define kUpb_Reserved_Hasbits (kUpb_Reserved_Hasbytes * 8)
41 
42 // Note: we sort by this number when calculating layout order.
43 typedef enum {
44   kUpb_LayoutItemType_OneofCase,   // Oneof case.
45   kUpb_LayoutItemType_OneofField,  // Oneof field data.
46   kUpb_LayoutItemType_Field,       // Non-oneof field data.
47 
48   kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
49 } upb_LayoutItemType;
50 
51 #define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
52 
53 typedef struct {
54   // Index of the corresponding field.  When this is a oneof field, the field's
55   // offset will be the index of the next field in a linked list.
56   uint16_t field_index;
57   uint16_t offset;
58   upb_FieldRep rep;
59   upb_LayoutItemType type;
60 } upb_LayoutItem;
61 
62 typedef struct {
63   upb_LayoutItem* data;
64   size_t size;
65   size_t capacity;
66 } upb_LayoutItemVector;
67 
68 typedef struct {
69   upb_MdDecoder base;
70   upb_MiniTable* table;
71   upb_MiniTableField* fields;
72   upb_MiniTablePlatform platform;
73   upb_LayoutItemVector vec;
74   upb_Arena* arena;
75 } upb_MtDecoder;
76 
77 // In each field's offset, we temporarily store a presence classifier:
78 enum PresenceClass {
79   kNoPresence = 0,
80   kHasbitPresence = 1,
81   kRequiredPresence = 2,
82   kOneofBase = 3,
83   // Negative values refer to a specific oneof with that number.  Positive
84   // values >= kOneofBase indicate that this field is in a oneof, and specify
85   // the next field in this oneof's linked list.
86 };
87 
upb_MtDecoder_FieldIsPackable(upb_MiniTableField * field)88 static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) {
89   return (field->UPB_PRIVATE(mode) & kUpb_FieldMode_Array) &&
90          upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype));
91 }
92 
93 typedef struct {
94   uint16_t submsg_count;
95   uint16_t subenum_count;
96 } upb_SubCounts;
97 
upb_MiniTable_SetTypeAndSub(upb_MiniTableField * field,upb_FieldType type,upb_SubCounts * sub_counts,uint64_t msg_modifiers,bool is_proto3_enum)98 static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field,
99                                         upb_FieldType type,
100                                         upb_SubCounts* sub_counts,
101                                         uint64_t msg_modifiers,
102                                         bool is_proto3_enum) {
103   if (is_proto3_enum) {
104     UPB_ASSERT(type == kUpb_FieldType_Enum);
105     type = kUpb_FieldType_Int32;
106     field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
107   } else if (type == kUpb_FieldType_String &&
108              !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
109     type = kUpb_FieldType_Bytes;
110     field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
111   }
112 
113   field->UPB_PRIVATE(descriptortype) = type;
114 
115   if (upb_MtDecoder_FieldIsPackable(field) &&
116       (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
117     field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsPacked;
118   }
119 
120   if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) {
121     field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++;
122   } else if (type == kUpb_FieldType_Enum) {
123     // We will need to update this later once we know the total number of
124     // submsg fields.
125     field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++;
126   } else {
127     field->UPB_PRIVATE(submsg_index) = kUpb_NoSub;
128   }
129 }
130 
131 static const char kUpb_EncodedToType[] = {
132     [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
133     [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
134     [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
135     [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
136     [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
137     [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
138     [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
139     [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
140     [kUpb_EncodedType_String] = kUpb_FieldType_String,
141     [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
142     [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
143     [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
144     [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
145     [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
146     [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
147     [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
148     [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
149     [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
150     [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
151 };
152 
upb_MiniTable_SetField(upb_MtDecoder * d,uint8_t ch,upb_MiniTableField * field,uint64_t msg_modifiers,upb_SubCounts * sub_counts)153 static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
154                                    upb_MiniTableField* field,
155                                    uint64_t msg_modifiers,
156                                    upb_SubCounts* sub_counts) {
157   static const char kUpb_EncodedToFieldRep[] = {
158       [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
159       [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
160       [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
161       [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
162       [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
163       [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
164       [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
165       [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
166       [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
167       [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
168       [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
169       [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte,
170       [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
171       [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
172       [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
173       [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
174       [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
175   };
176 
177   char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
178                          ? kUpb_FieldRep_4Byte
179                          : kUpb_FieldRep_8Byte;
180 
181   int8_t type = _upb_FromBase92(ch);
182   if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
183     type -= kUpb_EncodedType_RepeatedBase;
184     field->UPB_PRIVATE(mode) = kUpb_FieldMode_Array;
185     field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
186     field->UPB_PRIVATE(offset) = kNoPresence;
187   } else {
188     field->UPB_PRIVATE(mode) = kUpb_FieldMode_Scalar;
189     field->UPB_PRIVATE(offset) = kHasbitPresence;
190     if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) {
191       field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
192     } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) {
193       upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
194     } else {
195       field->UPB_PRIVATE(mode) |= kUpb_EncodedToFieldRep[type]
196                                   << kUpb_FieldRep_Shift;
197     }
198   }
199   if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) {
200     upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
201   }
202   upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts,
203                               msg_modifiers, type == kUpb_EncodedType_OpenEnum);
204 }
205 
upb_MtDecoder_ModifyField(upb_MtDecoder * d,uint32_t message_modifiers,uint32_t field_modifiers,upb_MiniTableField * field)206 static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
207                                       uint32_t message_modifiers,
208                                       uint32_t field_modifiers,
209                                       upb_MiniTableField* field) {
210   if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
211     if (!upb_MtDecoder_FieldIsPackable(field)) {
212       upb_MdDecoder_ErrorJmp(&d->base,
213                              "Cannot flip packed on unpackable field %" PRIu32,
214                              upb_MiniTableField_Number(field));
215     }
216     field->UPB_PRIVATE(mode) ^= kUpb_LabelFlags_IsPacked;
217   }
218 
219   if (field_modifiers & kUpb_EncodedFieldModifier_FlipValidateUtf8) {
220     if (field->UPB_PRIVATE(descriptortype) != kUpb_FieldType_Bytes ||
221         !(field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsAlternate)) {
222       upb_MdDecoder_ErrorJmp(&d->base,
223                              "Cannot flip ValidateUtf8 on field %" PRIu32
224                              ", type=%d, mode=%d",
225                              upb_MiniTableField_Number(field),
226                              (int)field->UPB_PRIVATE(descriptortype),
227                              (int)field->UPB_PRIVATE(mode));
228     }
229     field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_String;
230     field->UPB_PRIVATE(mode) &= ~kUpb_LabelFlags_IsAlternate;
231   }
232 
233   bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
234   bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
235 
236   // Validate.
237   if ((singular || required) && field->UPB_PRIVATE(offset) != kHasbitPresence) {
238     upb_MdDecoder_ErrorJmp(&d->base,
239                            "Invalid modifier(s) for repeated field %" PRIu32,
240                            upb_MiniTableField_Number(field));
241   }
242   if (singular && required) {
243     upb_MdDecoder_ErrorJmp(
244         &d->base, "Field %" PRIu32 " cannot be both singular and required",
245         upb_MiniTableField_Number(field));
246   }
247 
248   if (singular) field->UPB_PRIVATE(offset) = kNoPresence;
249   if (required) {
250     field->UPB_PRIVATE(offset) = kRequiredPresence;
251   }
252 }
253 
upb_MtDecoder_PushItem(upb_MtDecoder * d,upb_LayoutItem item)254 static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
255   if (d->vec.size == d->vec.capacity) {
256     size_t new_cap = UPB_MAX(8, d->vec.size * 2);
257     d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
258     upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data);
259     d->vec.capacity = new_cap;
260   }
261   d->vec.data[d->vec.size++] = item;
262 }
263 
upb_MtDecoder_PushOneof(upb_MtDecoder * d,upb_LayoutItem item)264 static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
265   if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
266     upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof");
267   }
268   item.field_index -= kOneofBase;
269 
270   // Push oneof data.
271   item.type = kUpb_LayoutItemType_OneofField;
272   upb_MtDecoder_PushItem(d, item);
273 
274   // Push oneof case.
275   item.rep = kUpb_FieldRep_4Byte;  // Field Number.
276   item.type = kUpb_LayoutItemType_OneofCase;
277   upb_MtDecoder_PushItem(d, item);
278 }
279 
upb_MtDecoder_SizeOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)280 static size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
281                                       upb_MiniTablePlatform platform) {
282   static const uint8_t kRepToSize32[] = {
283       [kUpb_FieldRep_1Byte] = 1,
284       [kUpb_FieldRep_4Byte] = 4,
285       [kUpb_FieldRep_StringView] = 8,
286       [kUpb_FieldRep_8Byte] = 8,
287   };
288   static const uint8_t kRepToSize64[] = {
289       [kUpb_FieldRep_1Byte] = 1,
290       [kUpb_FieldRep_4Byte] = 4,
291       [kUpb_FieldRep_StringView] = 16,
292       [kUpb_FieldRep_8Byte] = 8,
293   };
294   UPB_ASSERT(sizeof(upb_StringView) ==
295              UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
296   return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
297                                                   : kRepToSize64[rep];
298 }
299 
upb_MtDecoder_AlignOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)300 static size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
301                                        upb_MiniTablePlatform platform) {
302   static const uint8_t kRepToAlign32[] = {
303       [kUpb_FieldRep_1Byte] = 1,
304       [kUpb_FieldRep_4Byte] = 4,
305       [kUpb_FieldRep_StringView] = 4,
306       [kUpb_FieldRep_8Byte] = 8,
307   };
308   static const uint8_t kRepToAlign64[] = {
309       [kUpb_FieldRep_1Byte] = 1,
310       [kUpb_FieldRep_4Byte] = 4,
311       [kUpb_FieldRep_StringView] = 8,
312       [kUpb_FieldRep_8Byte] = 8,
313   };
314   UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
315              UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
316   return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
317                                                   : kRepToAlign64[rep];
318 }
319 
upb_MtDecoder_DecodeOneofField(upb_MtDecoder * d,const char * ptr,char first_ch,upb_LayoutItem * item)320 static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
321                                                   const char* ptr,
322                                                   char first_ch,
323                                                   upb_LayoutItem* item) {
324   uint32_t field_num;
325   ptr = upb_MdDecoder_DecodeBase92Varint(
326       &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
327       kUpb_EncodedValue_MaxOneofField, &field_num);
328   upb_MiniTableField* f =
329       (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
330 
331   if (!f) {
332     upb_MdDecoder_ErrorJmp(&d->base,
333                            "Couldn't add field number %" PRIu32
334                            " to oneof, no such field number.",
335                            field_num);
336   }
337   if (f->UPB_PRIVATE(offset) != kHasbitPresence) {
338     upb_MdDecoder_ErrorJmp(
339         &d->base,
340         "Cannot add repeated, required, or singular field %" PRIu32
341         " to oneof.",
342         field_num);
343   }
344 
345   // Oneof storage must be large enough to accommodate the largest member.
346   int rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift;
347   if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
348       upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
349     item->rep = rep;
350   }
351   // Prepend this field to the linked list.
352   f->UPB_PRIVATE(offset) = item->field_index;
353   item->field_index = (f - d->fields) + kOneofBase;
354   return ptr;
355 }
356 
upb_MtDecoder_DecodeOneofs(upb_MtDecoder * d,const char * ptr)357 static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
358                                               const char* ptr) {
359   upb_LayoutItem item = {.rep = 0,
360                          .field_index = kUpb_LayoutItem_IndexSentinel};
361   while (ptr < d->base.end) {
362     char ch = *ptr++;
363     if (ch == kUpb_EncodedValue_FieldSeparator) {
364       // Field separator, no action needed.
365     } else if (ch == kUpb_EncodedValue_OneofSeparator) {
366       // End of oneof.
367       upb_MtDecoder_PushOneof(d, item);
368       item.field_index = kUpb_LayoutItem_IndexSentinel;  // Move to next oneof.
369     } else {
370       ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
371     }
372   }
373 
374   // Push final oneof.
375   upb_MtDecoder_PushOneof(d, item);
376   return ptr;
377 }
378 
upb_MtDecoder_ParseModifier(upb_MtDecoder * d,const char * ptr,char first_ch,upb_MiniTableField * last_field,uint64_t * msg_modifiers)379 static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
380                                                const char* ptr, char first_ch,
381                                                upb_MiniTableField* last_field,
382                                                uint64_t* msg_modifiers) {
383   uint32_t mod;
384   ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch,
385                                          kUpb_EncodedValue_MinModifier,
386                                          kUpb_EncodedValue_MaxModifier, &mod);
387   if (last_field) {
388     upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
389   } else {
390     if (!d->table) {
391       upb_MdDecoder_ErrorJmp(&d->base,
392                              "Extensions cannot have message modifiers");
393     }
394     *msg_modifiers = mod;
395   }
396 
397   return ptr;
398 }
399 
upb_MtDecoder_AllocateSubs(upb_MtDecoder * d,upb_SubCounts sub_counts)400 static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d,
401                                        upb_SubCounts sub_counts) {
402   uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count;
403   size_t subs_bytes = sizeof(*d->table->UPB_PRIVATE(subs)) * total_count;
404   upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes);
405   upb_MdDecoder_CheckOutOfMemory(&d->base, subs);
406   uint32_t i = 0;
407   for (; i < sub_counts.submsg_count; i++) {
408     subs[i].UPB_PRIVATE(submsg) = UPB_PRIVATE(_upb_MiniTable_Empty)();
409   }
410   if (sub_counts.subenum_count) {
411     upb_MiniTableField* f = d->fields;
412     upb_MiniTableField* end_f = f + d->table->UPB_PRIVATE(field_count);
413     for (; f < end_f; f++) {
414       if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) {
415         f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count;
416       }
417     }
418     for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) {
419       subs[i].UPB_PRIVATE(subenum) = NULL;
420     }
421   }
422   d->table->UPB_PRIVATE(subs) = subs;
423 }
424 
upb_MtDecoder_Parse(upb_MtDecoder * d,const char * ptr,size_t len,void * fields,size_t field_size,uint16_t * field_count,upb_SubCounts * sub_counts)425 static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
426                                        size_t len, void* fields,
427                                        size_t field_size, uint16_t* field_count,
428                                        upb_SubCounts* sub_counts) {
429   uint64_t msg_modifiers = 0;
430   uint32_t last_field_number = 0;
431   upb_MiniTableField* last_field = NULL;
432   bool need_dense_below = d->table != NULL;
433 
434   d->base.end = UPB_PTRADD(ptr, len);
435 
436   while (ptr < d->base.end) {
437     char ch = *ptr++;
438     if (ch <= kUpb_EncodedValue_MaxField) {
439       if (!d->table && last_field) {
440         // For extensions, consume only a single field and then return.
441         return --ptr;
442       }
443       upb_MiniTableField* field = fields;
444       *field_count += 1;
445       fields = (char*)fields + field_size;
446       field->UPB_PRIVATE(number) = ++last_field_number;
447       last_field = field;
448       upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts);
449     } else if (kUpb_EncodedValue_MinModifier <= ch &&
450                ch <= kUpb_EncodedValue_MaxModifier) {
451       ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
452       if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
453         d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_Extendable;
454       }
455     } else if (ch == kUpb_EncodedValue_End) {
456       if (!d->table) {
457         upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs.");
458       }
459       ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
460     } else if (kUpb_EncodedValue_MinSkip <= ch &&
461                ch <= kUpb_EncodedValue_MaxSkip) {
462       if (need_dense_below) {
463         d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
464         need_dense_below = false;
465       }
466       uint32_t skip;
467       ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch,
468                                              kUpb_EncodedValue_MinSkip,
469                                              kUpb_EncodedValue_MaxSkip, &skip);
470       last_field_number += skip;
471       last_field_number--;  // Next field seen will increment.
472     } else {
473       upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch);
474     }
475   }
476 
477   if (need_dense_below) {
478     d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
479   }
480 
481   return ptr;
482 }
483 
upb_MtDecoder_ParseMessage(upb_MtDecoder * d,const char * data,size_t len)484 static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
485                                        size_t len) {
486   // Buffer length is an upper bound on the number of fields. We will return
487   // what we don't use.
488   d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
489   upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields);
490 
491   upb_SubCounts sub_counts = {0, 0};
492   d->table->UPB_PRIVATE(field_count) = 0;
493   d->table->UPB_PRIVATE(fields) = d->fields;
494   upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
495                       &d->table->UPB_PRIVATE(field_count), &sub_counts);
496 
497   upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
498                        sizeof(*d->fields) * d->table->UPB_PRIVATE(field_count));
499   d->table->UPB_PRIVATE(fields) = d->fields;
500   upb_MtDecoder_AllocateSubs(d, sub_counts);
501 }
502 
upb_MtDecoder_CompareFields(const void * _a,const void * _b)503 static int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
504   const upb_LayoutItem* a = _a;
505   const upb_LayoutItem* b = _b;
506   // Currently we just sort by:
507   //  1. rep (smallest fields first)
508   //  2. type (oneof cases first)
509   //  2. field_index (smallest numbers first)
510   // The main goal of this is to reduce space lost to padding.
511   // Later we may have more subtle reasons to prefer a different ordering.
512   const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max);
513   const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max);
514   const int idx_bits = (sizeof(a->field_index) * 8);
515   UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
516 #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
517   uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
518   uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
519   UPB_ASSERT(a_packed != b_packed);
520 #undef UPB_COMBINE
521   return a_packed < b_packed ? -1 : 1;
522 }
523 
upb_MtDecoder_SortLayoutItems(upb_MtDecoder * d)524 static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
525   // Add items for all non-oneof fields (oneofs were already added).
526   int n = d->table->UPB_PRIVATE(field_count);
527   for (int i = 0; i < n; i++) {
528     upb_MiniTableField* f = &d->fields[i];
529     if (f->UPB_PRIVATE(offset) >= kOneofBase) continue;
530     upb_LayoutItem item = {.field_index = i,
531                            .rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift,
532                            .type = kUpb_LayoutItemType_Field};
533     upb_MtDecoder_PushItem(d, item);
534   }
535 
536   if (d->vec.size) {
537     qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
538           upb_MtDecoder_CompareFields);
539   }
540 
541   return true;
542 }
543 
upb_MiniTable_DivideRoundUp(size_t n,size_t d)544 static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
545   return (n + d - 1) / d;
546 }
547 
upb_MtDecoder_AssignHasbits(upb_MtDecoder * d)548 static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) {
549   upb_MiniTable* ret = d->table;
550   int n = ret->UPB_PRIVATE(field_count);
551   size_t last_hasbit = kUpb_Reserved_Hasbits - 1;
552 
553   // First assign required fields, which must have the lowest hasbits.
554   for (int i = 0; i < n; i++) {
555     upb_MiniTableField* field =
556         (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
557     if (field->UPB_PRIVATE(offset) == kRequiredPresence) {
558       field->presence = ++last_hasbit;
559     } else if (field->UPB_PRIVATE(offset) == kNoPresence) {
560       field->presence = 0;
561     }
562   }
563   if (last_hasbit > kUpb_Reserved_Hasbits + 63) {
564     upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields");
565   }
566 
567   ret->UPB_PRIVATE(required_count) = last_hasbit - (kUpb_Reserved_Hasbits - 1);
568 
569   // Next assign non-required hasbit fields.
570   for (int i = 0; i < n; i++) {
571     upb_MiniTableField* field =
572         (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
573     if (field->UPB_PRIVATE(offset) == kHasbitPresence) {
574       field->presence = ++last_hasbit;
575     }
576   }
577 
578   ret->UPB_PRIVATE(size) =
579       last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
580 }
581 
upb_MtDecoder_Place(upb_MtDecoder * d,upb_FieldRep rep)582 static size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
583   size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
584   size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
585   size_t ret = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), align);
586   static const size_t max = UINT16_MAX;
587   size_t new_size = ret + size;
588   if (new_size > max) {
589     upb_MdDecoder_ErrorJmp(
590         &d->base, "Message size exceeded maximum size of %zu bytes", max);
591   }
592   d->table->UPB_PRIVATE(size) = new_size;
593   return ret;
594 }
595 
upb_MtDecoder_AssignOffsets(upb_MtDecoder * d)596 static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
597   upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
598 
599   // Compute offsets.
600   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
601     item->offset = upb_MtDecoder_Place(d, item->rep);
602   }
603 
604   // Assign oneof case offsets.  We must do these first, since assigning
605   // actual offsets will overwrite the links of the linked list.
606   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
607     if (item->type != kUpb_LayoutItemType_OneofCase) continue;
608     upb_MiniTableField* f = &d->fields[item->field_index];
609     while (true) {
610       f->presence = ~item->offset;
611       if (f->UPB_PRIVATE(offset) == kUpb_LayoutItem_IndexSentinel) break;
612       UPB_ASSERT(f->UPB_PRIVATE(offset) - kOneofBase <
613                  d->table->UPB_PRIVATE(field_count));
614       f = &d->fields[f->UPB_PRIVATE(offset) - kOneofBase];
615     }
616   }
617 
618   // Assign offsets.
619   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
620     upb_MiniTableField* f = &d->fields[item->field_index];
621     switch (item->type) {
622       case kUpb_LayoutItemType_OneofField:
623         while (true) {
624           uint16_t next_offset = f->UPB_PRIVATE(offset);
625           f->UPB_PRIVATE(offset) = item->offset;
626           if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
627           f = &d->fields[next_offset - kOneofBase];
628         }
629         break;
630       case kUpb_LayoutItemType_Field:
631         f->UPB_PRIVATE(offset) = item->offset;
632         break;
633       default:
634         break;
635     }
636   }
637 
638   // The fasttable parser (supported on 64-bit only) depends on this being a
639   // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
640   //
641   // On 32-bit we could potentially make this smaller, but there is no
642   // compelling reason to optimize this right now.
643   d->table->UPB_PRIVATE(size) = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), 8);
644 }
645 
upb_MtDecoder_ValidateEntryField(upb_MtDecoder * d,const upb_MiniTableField * f,uint32_t expected_num)646 static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d,
647                                              const upb_MiniTableField* f,
648                                              uint32_t expected_num) {
649   const char* name = expected_num == 1 ? "key" : "val";
650   const uint32_t f_number = upb_MiniTableField_Number(f);
651   if (f_number != expected_num) {
652     upb_MdDecoder_ErrorJmp(&d->base,
653                            "map %s did not have expected number (%d vs %d)",
654                            name, expected_num, f_number);
655   }
656 
657   if (!upb_MiniTableField_IsScalar(f)) {
658     upb_MdDecoder_ErrorJmp(
659         &d->base, "map %s cannot be repeated or map, or be in oneof", name);
660   }
661 
662   uint32_t not_ok_types;
663   if (expected_num == 1) {
664     not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) |
665                    (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) |
666                    (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum);
667   } else {
668     not_ok_types = 1 << kUpb_FieldType_Group;
669   }
670 
671   if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) {
672     upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name,
673                            (int)f->UPB_PRIVATE(descriptortype));
674   }
675 }
676 
upb_MtDecoder_ParseMap(upb_MtDecoder * d,const char * data,size_t len)677 static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data,
678                                    size_t len) {
679   upb_MtDecoder_ParseMessage(d, data, len);
680   upb_MtDecoder_AssignHasbits(d);
681 
682   if (UPB_UNLIKELY(d->table->UPB_PRIVATE(field_count) != 2)) {
683     upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map",
684                            d->table->UPB_PRIVATE(field_count));
685     UPB_UNREACHABLE();
686   }
687 
688   upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
689   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
690     if (item->type == kUpb_LayoutItemType_OneofCase) {
691       upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof");
692     }
693   }
694 
695   upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[0], 1);
696   upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[1], 2);
697 
698   d->fields[0].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, k);
699   d->fields[1].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, v);
700   d->table->UPB_PRIVATE(size) = sizeof(upb_MapEntry);
701 
702   // Map entries have a special bit set to signal it's a map entry, used in
703   // upb_MiniTable_SetSubMessage() below.
704   d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_IsMapEntry;
705 }
706 
upb_MtDecoder_ParseMessageSet(upb_MtDecoder * d,const char * data,size_t len)707 static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data,
708                                           size_t len) {
709   if (len > 0) {
710     upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu",
711                            len);
712   }
713 
714   upb_MiniTable* ret = d->table;
715   ret->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
716   ret->UPB_PRIVATE(field_count) = 0;
717   ret->UPB_PRIVATE(ext) = kUpb_ExtMode_IsMessageSet;
718   ret->UPB_PRIVATE(dense_below) = 0;
719   ret->UPB_PRIVATE(table_mask) = -1;
720   ret->UPB_PRIVATE(required_count) = 0;
721 }
722 
upb_MtDecoder_DoBuildMiniTableWithBuf(upb_MtDecoder * decoder,const char * data,size_t len,void ** buf,size_t * buf_size)723 static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf(
724     upb_MtDecoder* decoder, const char* data, size_t len, void** buf,
725     size_t* buf_size) {
726   upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table);
727 
728   decoder->table->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
729   decoder->table->UPB_PRIVATE(field_count) = 0;
730   decoder->table->UPB_PRIVATE(ext) = kUpb_ExtMode_NonExtendable;
731   decoder->table->UPB_PRIVATE(dense_below) = 0;
732   decoder->table->UPB_PRIVATE(table_mask) = -1;
733   decoder->table->UPB_PRIVATE(required_count) = 0;
734 
735   // Strip off and verify the version tag.
736   if (!len--) goto done;
737   const char vers = *data++;
738 
739   switch (vers) {
740     case kUpb_EncodedVersion_MapV1:
741       upb_MtDecoder_ParseMap(decoder, data, len);
742       break;
743 
744     case kUpb_EncodedVersion_MessageV1:
745       upb_MtDecoder_ParseMessage(decoder, data, len);
746       upb_MtDecoder_AssignHasbits(decoder);
747       upb_MtDecoder_SortLayoutItems(decoder);
748       upb_MtDecoder_AssignOffsets(decoder);
749       break;
750 
751     case kUpb_EncodedVersion_MessageSetV1:
752       upb_MtDecoder_ParseMessageSet(decoder, data, len);
753       break;
754 
755     default:
756       upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c",
757                              vers);
758   }
759 
760 done:
761   *buf = decoder->vec.data;
762   *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
763   return decoder->table;
764 }
765 
upb_MtDecoder_BuildMiniTableWithBuf(upb_MtDecoder * const decoder,const char * const data,const size_t len,void ** const buf,size_t * const buf_size)766 static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf(
767     upb_MtDecoder* const decoder, const char* const data, const size_t len,
768     void** const buf, size_t* const buf_size) {
769   if (UPB_SETJMP(decoder->base.err) != 0) {
770     *buf = decoder->vec.data;
771     *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
772     return NULL;
773   }
774 
775   return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf,
776                                                buf_size);
777 }
778 
upb_MiniTable_BuildWithBuf(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,void ** buf,size_t * buf_size,upb_Status * status)779 upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
780                                           upb_MiniTablePlatform platform,
781                                           upb_Arena* arena, void** buf,
782                                           size_t* buf_size,
783                                           upb_Status* status) {
784   upb_MtDecoder decoder = {
785       .base = {.status = status},
786       .platform = platform,
787       .vec =
788           {
789               .data = *buf,
790               .capacity = *buf_size / sizeof(*decoder.vec.data),
791               .size = 0,
792           },
793       .arena = arena,
794       .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
795   };
796 
797   return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf,
798                                              buf_size);
799 }
800 
upb_MtDecoder_DoBuildMiniTableExtension(upb_MtDecoder * decoder,const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub)801 static const char* upb_MtDecoder_DoBuildMiniTableExtension(
802     upb_MtDecoder* decoder, const char* data, size_t len,
803     upb_MiniTableExtension* ext, const upb_MiniTable* extendee,
804     upb_MiniTableSub sub) {
805   // If the string is non-empty then it must begin with a version tag.
806   if (len) {
807     if (*data != kUpb_EncodedVersion_ExtensionV1) {
808       upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data);
809     }
810     data++;
811     len--;
812   }
813 
814   uint16_t count = 0;
815   upb_SubCounts sub_counts = {0, 0};
816   const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext),
817                                         &count, &sub_counts);
818   if (!ret || count != 1) return NULL;
819 
820   upb_MiniTableField* f = &ext->UPB_PRIVATE(field);
821 
822   f->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsExtension;
823   f->UPB_PRIVATE(offset) = 0;
824   f->presence = 0;
825 
826   if (extendee->UPB_PRIVATE(ext) & kUpb_ExtMode_IsMessageSet) {
827     // Extensions of MessageSet must be messages.
828     if (!upb_MiniTableField_IsSubMessage(f)) return NULL;
829 
830     // Extensions of MessageSet must be non-repeating.
831     if (upb_MiniTableField_IsArray(f)) return NULL;
832   }
833 
834   ext->UPB_PRIVATE(extendee) = extendee;
835   ext->UPB_PRIVATE(sub) = sub;
836 
837   return ret;
838 }
839 
upb_MtDecoder_BuildMiniTableExtension(upb_MtDecoder * const decoder,const char * const data,const size_t len,upb_MiniTableExtension * const ext,const upb_MiniTable * const extendee,const upb_MiniTableSub sub)840 static const char* upb_MtDecoder_BuildMiniTableExtension(
841     upb_MtDecoder* const decoder, const char* const data, const size_t len,
842     upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee,
843     const upb_MiniTableSub sub) {
844   if (UPB_SETJMP(decoder->base.err) != 0) return NULL;
845   return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext,
846                                                  extendee, sub);
847 }
848 
_upb_MiniTableExtension_Init(const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Status * status)849 const char* _upb_MiniTableExtension_Init(const char* data, size_t len,
850                                          upb_MiniTableExtension* ext,
851                                          const upb_MiniTable* extendee,
852                                          upb_MiniTableSub sub,
853                                          upb_MiniTablePlatform platform,
854                                          upb_Status* status) {
855   upb_MtDecoder decoder = {
856       .base = {.status = status},
857       .arena = NULL,
858       .table = NULL,
859       .platform = platform,
860   };
861 
862   return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext,
863                                                extendee, sub);
864 }
865 
_upb_MiniTableExtension_Build(const char * data,size_t len,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)866 upb_MiniTableExtension* _upb_MiniTableExtension_Build(
867     const char* data, size_t len, const upb_MiniTable* extendee,
868     upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena,
869     upb_Status* status) {
870   upb_MiniTableExtension* ext =
871       upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension));
872   if (UPB_UNLIKELY(!ext)) return NULL;
873 
874   const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub,
875                                                  platform, status);
876   if (UPB_UNLIKELY(!ptr)) return NULL;
877 
878   return ext;
879 }
880 
_upb_MiniTable_Build(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)881 upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len,
882                                     upb_MiniTablePlatform platform,
883                                     upb_Arena* arena, upb_Status* status) {
884   void* buf = NULL;
885   size_t size = 0;
886   upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
887                                                   &buf, &size, status);
888   free(buf);
889   return ret;
890 }
891