1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/mini_descriptor/decode.h"
9
10 #include <inttypes.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include "upb/base/descriptor_constants.h"
16 #include "upb/base/internal/log2.h"
17 #include "upb/base/status.h"
18 #include "upb/base/string_view.h"
19 #include "upb/mem/arena.h"
20 #include "upb/message/internal/map_entry.h"
21 #include "upb/message/internal/types.h"
22 #include "upb/mini_descriptor/internal/base92.h"
23 #include "upb/mini_descriptor/internal/decoder.h"
24 #include "upb/mini_descriptor/internal/modifiers.h"
25 #include "upb/mini_descriptor/internal/wire_constants.h"
26 #include "upb/mini_table/extension.h"
27 #include "upb/mini_table/field.h"
28 #include "upb/mini_table/internal/field.h"
29 #include "upb/mini_table/internal/message.h"
30 #include "upb/mini_table/message.h"
31 #include "upb/mini_table/sub.h"
32
33 // Must be last.
34 #include "upb/port/def.inc"
35
36 // We reserve unused hasbits to make room for upb_Message fields.
37 #define kUpb_Reserved_Hasbytes sizeof(struct upb_Message)
38
39 // 64 is the first hasbit that we currently use.
40 #define kUpb_Reserved_Hasbits (kUpb_Reserved_Hasbytes * 8)
41
42 // Note: we sort by this number when calculating layout order.
43 typedef enum {
44 kUpb_LayoutItemType_OneofCase, // Oneof case.
45 kUpb_LayoutItemType_OneofField, // Oneof field data.
46 kUpb_LayoutItemType_Field, // Non-oneof field data.
47
48 kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
49 } upb_LayoutItemType;
50
51 #define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
52
53 typedef struct {
54 // Index of the corresponding field. When this is a oneof field, the field's
55 // offset will be the index of the next field in a linked list.
56 uint16_t field_index;
57 uint16_t offset;
58 upb_FieldRep rep;
59 upb_LayoutItemType type;
60 } upb_LayoutItem;
61
62 typedef struct {
63 upb_LayoutItem* data;
64 size_t size;
65 size_t capacity;
66 } upb_LayoutItemVector;
67
68 typedef struct {
69 upb_MdDecoder base;
70 upb_MiniTable* table;
71 upb_MiniTableField* fields;
72 upb_MiniTablePlatform platform;
73 upb_LayoutItemVector vec;
74 upb_Arena* arena;
75 } upb_MtDecoder;
76
77 // In each field's offset, we temporarily store a presence classifier:
78 enum PresenceClass {
79 kNoPresence = 0,
80 kHasbitPresence = 1,
81 kRequiredPresence = 2,
82 kOneofBase = 3,
83 // Negative values refer to a specific oneof with that number. Positive
84 // values >= kOneofBase indicate that this field is in a oneof, and specify
85 // the next field in this oneof's linked list.
86 };
87
upb_MtDecoder_FieldIsPackable(upb_MiniTableField * field)88 static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) {
89 return (field->UPB_PRIVATE(mode) & kUpb_FieldMode_Array) &&
90 upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype));
91 }
92
93 typedef struct {
94 uint16_t submsg_count;
95 uint16_t subenum_count;
96 } upb_SubCounts;
97
upb_MiniTable_SetTypeAndSub(upb_MiniTableField * field,upb_FieldType type,upb_SubCounts * sub_counts,uint64_t msg_modifiers,bool is_proto3_enum)98 static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field,
99 upb_FieldType type,
100 upb_SubCounts* sub_counts,
101 uint64_t msg_modifiers,
102 bool is_proto3_enum) {
103 if (is_proto3_enum) {
104 UPB_ASSERT(type == kUpb_FieldType_Enum);
105 type = kUpb_FieldType_Int32;
106 field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
107 } else if (type == kUpb_FieldType_String &&
108 !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
109 type = kUpb_FieldType_Bytes;
110 field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
111 }
112
113 field->UPB_PRIVATE(descriptortype) = type;
114
115 if (upb_MtDecoder_FieldIsPackable(field) &&
116 (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
117 field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsPacked;
118 }
119
120 if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) {
121 field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++;
122 } else if (type == kUpb_FieldType_Enum) {
123 // We will need to update this later once we know the total number of
124 // submsg fields.
125 field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++;
126 } else {
127 field->UPB_PRIVATE(submsg_index) = kUpb_NoSub;
128 }
129 }
130
131 static const char kUpb_EncodedToType[] = {
132 [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
133 [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
134 [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
135 [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
136 [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
137 [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
138 [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
139 [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
140 [kUpb_EncodedType_String] = kUpb_FieldType_String,
141 [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
142 [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
143 [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
144 [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
145 [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
146 [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
147 [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
148 [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
149 [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
150 [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
151 };
152
upb_MiniTable_SetField(upb_MtDecoder * d,uint8_t ch,upb_MiniTableField * field,uint64_t msg_modifiers,upb_SubCounts * sub_counts)153 static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
154 upb_MiniTableField* field,
155 uint64_t msg_modifiers,
156 upb_SubCounts* sub_counts) {
157 static const char kUpb_EncodedToFieldRep[] = {
158 [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
159 [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
160 [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
161 [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
162 [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
163 [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
164 [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
165 [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
166 [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
167 [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
168 [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
169 [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte,
170 [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
171 [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
172 [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
173 [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
174 [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
175 };
176
177 char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
178 ? kUpb_FieldRep_4Byte
179 : kUpb_FieldRep_8Byte;
180
181 int8_t type = _upb_FromBase92(ch);
182 if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
183 type -= kUpb_EncodedType_RepeatedBase;
184 field->UPB_PRIVATE(mode) = kUpb_FieldMode_Array;
185 field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
186 field->UPB_PRIVATE(offset) = kNoPresence;
187 } else {
188 field->UPB_PRIVATE(mode) = kUpb_FieldMode_Scalar;
189 field->UPB_PRIVATE(offset) = kHasbitPresence;
190 if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) {
191 field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
192 } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) {
193 upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
194 } else {
195 field->UPB_PRIVATE(mode) |= kUpb_EncodedToFieldRep[type]
196 << kUpb_FieldRep_Shift;
197 }
198 }
199 if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) {
200 upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
201 }
202 upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts,
203 msg_modifiers, type == kUpb_EncodedType_OpenEnum);
204 }
205
upb_MtDecoder_ModifyField(upb_MtDecoder * d,uint32_t message_modifiers,uint32_t field_modifiers,upb_MiniTableField * field)206 static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
207 uint32_t message_modifiers,
208 uint32_t field_modifiers,
209 upb_MiniTableField* field) {
210 if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
211 if (!upb_MtDecoder_FieldIsPackable(field)) {
212 upb_MdDecoder_ErrorJmp(&d->base,
213 "Cannot flip packed on unpackable field %" PRIu32,
214 upb_MiniTableField_Number(field));
215 }
216 field->UPB_PRIVATE(mode) ^= kUpb_LabelFlags_IsPacked;
217 }
218
219 if (field_modifiers & kUpb_EncodedFieldModifier_FlipValidateUtf8) {
220 if (field->UPB_PRIVATE(descriptortype) != kUpb_FieldType_Bytes ||
221 !(field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsAlternate)) {
222 upb_MdDecoder_ErrorJmp(&d->base,
223 "Cannot flip ValidateUtf8 on field %" PRIu32
224 ", type=%d, mode=%d",
225 upb_MiniTableField_Number(field),
226 (int)field->UPB_PRIVATE(descriptortype),
227 (int)field->UPB_PRIVATE(mode));
228 }
229 field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_String;
230 field->UPB_PRIVATE(mode) &= ~kUpb_LabelFlags_IsAlternate;
231 }
232
233 bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
234 bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
235
236 // Validate.
237 if ((singular || required) && field->UPB_PRIVATE(offset) != kHasbitPresence) {
238 upb_MdDecoder_ErrorJmp(&d->base,
239 "Invalid modifier(s) for repeated field %" PRIu32,
240 upb_MiniTableField_Number(field));
241 }
242 if (singular && required) {
243 upb_MdDecoder_ErrorJmp(
244 &d->base, "Field %" PRIu32 " cannot be both singular and required",
245 upb_MiniTableField_Number(field));
246 }
247
248 if (singular) field->UPB_PRIVATE(offset) = kNoPresence;
249 if (required) {
250 field->UPB_PRIVATE(offset) = kRequiredPresence;
251 }
252 }
253
upb_MtDecoder_PushItem(upb_MtDecoder * d,upb_LayoutItem item)254 static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
255 if (d->vec.size == d->vec.capacity) {
256 size_t new_cap = UPB_MAX(8, d->vec.size * 2);
257 d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
258 upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data);
259 d->vec.capacity = new_cap;
260 }
261 d->vec.data[d->vec.size++] = item;
262 }
263
upb_MtDecoder_PushOneof(upb_MtDecoder * d,upb_LayoutItem item)264 static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
265 if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
266 upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof");
267 }
268 item.field_index -= kOneofBase;
269
270 // Push oneof data.
271 item.type = kUpb_LayoutItemType_OneofField;
272 upb_MtDecoder_PushItem(d, item);
273
274 // Push oneof case.
275 item.rep = kUpb_FieldRep_4Byte; // Field Number.
276 item.type = kUpb_LayoutItemType_OneofCase;
277 upb_MtDecoder_PushItem(d, item);
278 }
279
upb_MtDecoder_SizeOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)280 static size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
281 upb_MiniTablePlatform platform) {
282 static const uint8_t kRepToSize32[] = {
283 [kUpb_FieldRep_1Byte] = 1,
284 [kUpb_FieldRep_4Byte] = 4,
285 [kUpb_FieldRep_StringView] = 8,
286 [kUpb_FieldRep_8Byte] = 8,
287 };
288 static const uint8_t kRepToSize64[] = {
289 [kUpb_FieldRep_1Byte] = 1,
290 [kUpb_FieldRep_4Byte] = 4,
291 [kUpb_FieldRep_StringView] = 16,
292 [kUpb_FieldRep_8Byte] = 8,
293 };
294 UPB_ASSERT(sizeof(upb_StringView) ==
295 UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
296 return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
297 : kRepToSize64[rep];
298 }
299
upb_MtDecoder_AlignOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)300 static size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
301 upb_MiniTablePlatform platform) {
302 static const uint8_t kRepToAlign32[] = {
303 [kUpb_FieldRep_1Byte] = 1,
304 [kUpb_FieldRep_4Byte] = 4,
305 [kUpb_FieldRep_StringView] = 4,
306 [kUpb_FieldRep_8Byte] = 8,
307 };
308 static const uint8_t kRepToAlign64[] = {
309 [kUpb_FieldRep_1Byte] = 1,
310 [kUpb_FieldRep_4Byte] = 4,
311 [kUpb_FieldRep_StringView] = 8,
312 [kUpb_FieldRep_8Byte] = 8,
313 };
314 UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
315 UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
316 return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
317 : kRepToAlign64[rep];
318 }
319
upb_MtDecoder_DecodeOneofField(upb_MtDecoder * d,const char * ptr,char first_ch,upb_LayoutItem * item)320 static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
321 const char* ptr,
322 char first_ch,
323 upb_LayoutItem* item) {
324 uint32_t field_num;
325 ptr = upb_MdDecoder_DecodeBase92Varint(
326 &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
327 kUpb_EncodedValue_MaxOneofField, &field_num);
328 upb_MiniTableField* f =
329 (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
330
331 if (!f) {
332 upb_MdDecoder_ErrorJmp(&d->base,
333 "Couldn't add field number %" PRIu32
334 " to oneof, no such field number.",
335 field_num);
336 }
337 if (f->UPB_PRIVATE(offset) != kHasbitPresence) {
338 upb_MdDecoder_ErrorJmp(
339 &d->base,
340 "Cannot add repeated, required, or singular field %" PRIu32
341 " to oneof.",
342 field_num);
343 }
344
345 // Oneof storage must be large enough to accommodate the largest member.
346 int rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift;
347 if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
348 upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
349 item->rep = rep;
350 }
351 // Prepend this field to the linked list.
352 f->UPB_PRIVATE(offset) = item->field_index;
353 item->field_index = (f - d->fields) + kOneofBase;
354 return ptr;
355 }
356
upb_MtDecoder_DecodeOneofs(upb_MtDecoder * d,const char * ptr)357 static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
358 const char* ptr) {
359 upb_LayoutItem item = {.rep = 0,
360 .field_index = kUpb_LayoutItem_IndexSentinel};
361 while (ptr < d->base.end) {
362 char ch = *ptr++;
363 if (ch == kUpb_EncodedValue_FieldSeparator) {
364 // Field separator, no action needed.
365 } else if (ch == kUpb_EncodedValue_OneofSeparator) {
366 // End of oneof.
367 upb_MtDecoder_PushOneof(d, item);
368 item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof.
369 } else {
370 ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
371 }
372 }
373
374 // Push final oneof.
375 upb_MtDecoder_PushOneof(d, item);
376 return ptr;
377 }
378
upb_MtDecoder_ParseModifier(upb_MtDecoder * d,const char * ptr,char first_ch,upb_MiniTableField * last_field,uint64_t * msg_modifiers)379 static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
380 const char* ptr, char first_ch,
381 upb_MiniTableField* last_field,
382 uint64_t* msg_modifiers) {
383 uint32_t mod;
384 ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch,
385 kUpb_EncodedValue_MinModifier,
386 kUpb_EncodedValue_MaxModifier, &mod);
387 if (last_field) {
388 upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
389 } else {
390 if (!d->table) {
391 upb_MdDecoder_ErrorJmp(&d->base,
392 "Extensions cannot have message modifiers");
393 }
394 *msg_modifiers = mod;
395 }
396
397 return ptr;
398 }
399
upb_MtDecoder_AllocateSubs(upb_MtDecoder * d,upb_SubCounts sub_counts)400 static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d,
401 upb_SubCounts sub_counts) {
402 uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count;
403 size_t subs_bytes = sizeof(*d->table->UPB_PRIVATE(subs)) * total_count;
404 upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes);
405 upb_MdDecoder_CheckOutOfMemory(&d->base, subs);
406 uint32_t i = 0;
407 for (; i < sub_counts.submsg_count; i++) {
408 subs[i].UPB_PRIVATE(submsg) = UPB_PRIVATE(_upb_MiniTable_Empty)();
409 }
410 if (sub_counts.subenum_count) {
411 upb_MiniTableField* f = d->fields;
412 upb_MiniTableField* end_f = f + d->table->UPB_PRIVATE(field_count);
413 for (; f < end_f; f++) {
414 if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) {
415 f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count;
416 }
417 }
418 for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) {
419 subs[i].UPB_PRIVATE(subenum) = NULL;
420 }
421 }
422 d->table->UPB_PRIVATE(subs) = subs;
423 }
424
upb_MtDecoder_Parse(upb_MtDecoder * d,const char * ptr,size_t len,void * fields,size_t field_size,uint16_t * field_count,upb_SubCounts * sub_counts)425 static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
426 size_t len, void* fields,
427 size_t field_size, uint16_t* field_count,
428 upb_SubCounts* sub_counts) {
429 uint64_t msg_modifiers = 0;
430 uint32_t last_field_number = 0;
431 upb_MiniTableField* last_field = NULL;
432 bool need_dense_below = d->table != NULL;
433
434 d->base.end = UPB_PTRADD(ptr, len);
435
436 while (ptr < d->base.end) {
437 char ch = *ptr++;
438 if (ch <= kUpb_EncodedValue_MaxField) {
439 if (!d->table && last_field) {
440 // For extensions, consume only a single field and then return.
441 return --ptr;
442 }
443 upb_MiniTableField* field = fields;
444 *field_count += 1;
445 fields = (char*)fields + field_size;
446 field->UPB_PRIVATE(number) = ++last_field_number;
447 last_field = field;
448 upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts);
449 } else if (kUpb_EncodedValue_MinModifier <= ch &&
450 ch <= kUpb_EncodedValue_MaxModifier) {
451 ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
452 if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
453 d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_Extendable;
454 }
455 } else if (ch == kUpb_EncodedValue_End) {
456 if (!d->table) {
457 upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs.");
458 }
459 ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
460 } else if (kUpb_EncodedValue_MinSkip <= ch &&
461 ch <= kUpb_EncodedValue_MaxSkip) {
462 if (need_dense_below) {
463 d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
464 need_dense_below = false;
465 }
466 uint32_t skip;
467 ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch,
468 kUpb_EncodedValue_MinSkip,
469 kUpb_EncodedValue_MaxSkip, &skip);
470 last_field_number += skip;
471 last_field_number--; // Next field seen will increment.
472 } else {
473 upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch);
474 }
475 }
476
477 if (need_dense_below) {
478 d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
479 }
480
481 return ptr;
482 }
483
upb_MtDecoder_ParseMessage(upb_MtDecoder * d,const char * data,size_t len)484 static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
485 size_t len) {
486 // Buffer length is an upper bound on the number of fields. We will return
487 // what we don't use.
488 d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
489 upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields);
490
491 upb_SubCounts sub_counts = {0, 0};
492 d->table->UPB_PRIVATE(field_count) = 0;
493 d->table->UPB_PRIVATE(fields) = d->fields;
494 upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
495 &d->table->UPB_PRIVATE(field_count), &sub_counts);
496
497 upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
498 sizeof(*d->fields) * d->table->UPB_PRIVATE(field_count));
499 d->table->UPB_PRIVATE(fields) = d->fields;
500 upb_MtDecoder_AllocateSubs(d, sub_counts);
501 }
502
upb_MtDecoder_CompareFields(const void * _a,const void * _b)503 static int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
504 const upb_LayoutItem* a = _a;
505 const upb_LayoutItem* b = _b;
506 // Currently we just sort by:
507 // 1. rep (smallest fields first)
508 // 2. type (oneof cases first)
509 // 2. field_index (smallest numbers first)
510 // The main goal of this is to reduce space lost to padding.
511 // Later we may have more subtle reasons to prefer a different ordering.
512 const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max);
513 const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max);
514 const int idx_bits = (sizeof(a->field_index) * 8);
515 UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
516 #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
517 uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
518 uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
519 UPB_ASSERT(a_packed != b_packed);
520 #undef UPB_COMBINE
521 return a_packed < b_packed ? -1 : 1;
522 }
523
upb_MtDecoder_SortLayoutItems(upb_MtDecoder * d)524 static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
525 // Add items for all non-oneof fields (oneofs were already added).
526 int n = d->table->UPB_PRIVATE(field_count);
527 for (int i = 0; i < n; i++) {
528 upb_MiniTableField* f = &d->fields[i];
529 if (f->UPB_PRIVATE(offset) >= kOneofBase) continue;
530 upb_LayoutItem item = {.field_index = i,
531 .rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift,
532 .type = kUpb_LayoutItemType_Field};
533 upb_MtDecoder_PushItem(d, item);
534 }
535
536 if (d->vec.size) {
537 qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
538 upb_MtDecoder_CompareFields);
539 }
540
541 return true;
542 }
543
upb_MiniTable_DivideRoundUp(size_t n,size_t d)544 static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
545 return (n + d - 1) / d;
546 }
547
upb_MtDecoder_AssignHasbits(upb_MtDecoder * d)548 static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) {
549 upb_MiniTable* ret = d->table;
550 int n = ret->UPB_PRIVATE(field_count);
551 size_t last_hasbit = kUpb_Reserved_Hasbits - 1;
552
553 // First assign required fields, which must have the lowest hasbits.
554 for (int i = 0; i < n; i++) {
555 upb_MiniTableField* field =
556 (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
557 if (field->UPB_PRIVATE(offset) == kRequiredPresence) {
558 field->presence = ++last_hasbit;
559 } else if (field->UPB_PRIVATE(offset) == kNoPresence) {
560 field->presence = 0;
561 }
562 }
563 if (last_hasbit > kUpb_Reserved_Hasbits + 63) {
564 upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields");
565 }
566
567 ret->UPB_PRIVATE(required_count) = last_hasbit - (kUpb_Reserved_Hasbits - 1);
568
569 // Next assign non-required hasbit fields.
570 for (int i = 0; i < n; i++) {
571 upb_MiniTableField* field =
572 (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
573 if (field->UPB_PRIVATE(offset) == kHasbitPresence) {
574 field->presence = ++last_hasbit;
575 }
576 }
577
578 ret->UPB_PRIVATE(size) =
579 last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
580 }
581
upb_MtDecoder_Place(upb_MtDecoder * d,upb_FieldRep rep)582 static size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
583 size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
584 size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
585 size_t ret = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), align);
586 static const size_t max = UINT16_MAX;
587 size_t new_size = ret + size;
588 if (new_size > max) {
589 upb_MdDecoder_ErrorJmp(
590 &d->base, "Message size exceeded maximum size of %zu bytes", max);
591 }
592 d->table->UPB_PRIVATE(size) = new_size;
593 return ret;
594 }
595
upb_MtDecoder_AssignOffsets(upb_MtDecoder * d)596 static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
597 upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
598
599 // Compute offsets.
600 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
601 item->offset = upb_MtDecoder_Place(d, item->rep);
602 }
603
604 // Assign oneof case offsets. We must do these first, since assigning
605 // actual offsets will overwrite the links of the linked list.
606 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
607 if (item->type != kUpb_LayoutItemType_OneofCase) continue;
608 upb_MiniTableField* f = &d->fields[item->field_index];
609 while (true) {
610 f->presence = ~item->offset;
611 if (f->UPB_PRIVATE(offset) == kUpb_LayoutItem_IndexSentinel) break;
612 UPB_ASSERT(f->UPB_PRIVATE(offset) - kOneofBase <
613 d->table->UPB_PRIVATE(field_count));
614 f = &d->fields[f->UPB_PRIVATE(offset) - kOneofBase];
615 }
616 }
617
618 // Assign offsets.
619 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
620 upb_MiniTableField* f = &d->fields[item->field_index];
621 switch (item->type) {
622 case kUpb_LayoutItemType_OneofField:
623 while (true) {
624 uint16_t next_offset = f->UPB_PRIVATE(offset);
625 f->UPB_PRIVATE(offset) = item->offset;
626 if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
627 f = &d->fields[next_offset - kOneofBase];
628 }
629 break;
630 case kUpb_LayoutItemType_Field:
631 f->UPB_PRIVATE(offset) = item->offset;
632 break;
633 default:
634 break;
635 }
636 }
637
638 // The fasttable parser (supported on 64-bit only) depends on this being a
639 // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
640 //
641 // On 32-bit we could potentially make this smaller, but there is no
642 // compelling reason to optimize this right now.
643 d->table->UPB_PRIVATE(size) = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), 8);
644 }
645
upb_MtDecoder_ValidateEntryField(upb_MtDecoder * d,const upb_MiniTableField * f,uint32_t expected_num)646 static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d,
647 const upb_MiniTableField* f,
648 uint32_t expected_num) {
649 const char* name = expected_num == 1 ? "key" : "val";
650 const uint32_t f_number = upb_MiniTableField_Number(f);
651 if (f_number != expected_num) {
652 upb_MdDecoder_ErrorJmp(&d->base,
653 "map %s did not have expected number (%d vs %d)",
654 name, expected_num, f_number);
655 }
656
657 if (!upb_MiniTableField_IsScalar(f)) {
658 upb_MdDecoder_ErrorJmp(
659 &d->base, "map %s cannot be repeated or map, or be in oneof", name);
660 }
661
662 uint32_t not_ok_types;
663 if (expected_num == 1) {
664 not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) |
665 (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) |
666 (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum);
667 } else {
668 not_ok_types = 1 << kUpb_FieldType_Group;
669 }
670
671 if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) {
672 upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name,
673 (int)f->UPB_PRIVATE(descriptortype));
674 }
675 }
676
upb_MtDecoder_ParseMap(upb_MtDecoder * d,const char * data,size_t len)677 static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data,
678 size_t len) {
679 upb_MtDecoder_ParseMessage(d, data, len);
680 upb_MtDecoder_AssignHasbits(d);
681
682 if (UPB_UNLIKELY(d->table->UPB_PRIVATE(field_count) != 2)) {
683 upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map",
684 d->table->UPB_PRIVATE(field_count));
685 UPB_UNREACHABLE();
686 }
687
688 upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
689 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
690 if (item->type == kUpb_LayoutItemType_OneofCase) {
691 upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof");
692 }
693 }
694
695 upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[0], 1);
696 upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[1], 2);
697
698 d->fields[0].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, k);
699 d->fields[1].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, v);
700 d->table->UPB_PRIVATE(size) = sizeof(upb_MapEntry);
701
702 // Map entries have a special bit set to signal it's a map entry, used in
703 // upb_MiniTable_SetSubMessage() below.
704 d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_IsMapEntry;
705 }
706
upb_MtDecoder_ParseMessageSet(upb_MtDecoder * d,const char * data,size_t len)707 static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data,
708 size_t len) {
709 if (len > 0) {
710 upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu",
711 len);
712 }
713
714 upb_MiniTable* ret = d->table;
715 ret->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
716 ret->UPB_PRIVATE(field_count) = 0;
717 ret->UPB_PRIVATE(ext) = kUpb_ExtMode_IsMessageSet;
718 ret->UPB_PRIVATE(dense_below) = 0;
719 ret->UPB_PRIVATE(table_mask) = -1;
720 ret->UPB_PRIVATE(required_count) = 0;
721 }
722
upb_MtDecoder_DoBuildMiniTableWithBuf(upb_MtDecoder * decoder,const char * data,size_t len,void ** buf,size_t * buf_size)723 static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf(
724 upb_MtDecoder* decoder, const char* data, size_t len, void** buf,
725 size_t* buf_size) {
726 upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table);
727
728 decoder->table->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
729 decoder->table->UPB_PRIVATE(field_count) = 0;
730 decoder->table->UPB_PRIVATE(ext) = kUpb_ExtMode_NonExtendable;
731 decoder->table->UPB_PRIVATE(dense_below) = 0;
732 decoder->table->UPB_PRIVATE(table_mask) = -1;
733 decoder->table->UPB_PRIVATE(required_count) = 0;
734
735 // Strip off and verify the version tag.
736 if (!len--) goto done;
737 const char vers = *data++;
738
739 switch (vers) {
740 case kUpb_EncodedVersion_MapV1:
741 upb_MtDecoder_ParseMap(decoder, data, len);
742 break;
743
744 case kUpb_EncodedVersion_MessageV1:
745 upb_MtDecoder_ParseMessage(decoder, data, len);
746 upb_MtDecoder_AssignHasbits(decoder);
747 upb_MtDecoder_SortLayoutItems(decoder);
748 upb_MtDecoder_AssignOffsets(decoder);
749 break;
750
751 case kUpb_EncodedVersion_MessageSetV1:
752 upb_MtDecoder_ParseMessageSet(decoder, data, len);
753 break;
754
755 default:
756 upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c",
757 vers);
758 }
759
760 done:
761 *buf = decoder->vec.data;
762 *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
763 return decoder->table;
764 }
765
upb_MtDecoder_BuildMiniTableWithBuf(upb_MtDecoder * const decoder,const char * const data,const size_t len,void ** const buf,size_t * const buf_size)766 static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf(
767 upb_MtDecoder* const decoder, const char* const data, const size_t len,
768 void** const buf, size_t* const buf_size) {
769 if (UPB_SETJMP(decoder->base.err) != 0) {
770 *buf = decoder->vec.data;
771 *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
772 return NULL;
773 }
774
775 return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf,
776 buf_size);
777 }
778
upb_MiniTable_BuildWithBuf(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,void ** buf,size_t * buf_size,upb_Status * status)779 upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
780 upb_MiniTablePlatform platform,
781 upb_Arena* arena, void** buf,
782 size_t* buf_size,
783 upb_Status* status) {
784 upb_MtDecoder decoder = {
785 .base = {.status = status},
786 .platform = platform,
787 .vec =
788 {
789 .data = *buf,
790 .capacity = *buf_size / sizeof(*decoder.vec.data),
791 .size = 0,
792 },
793 .arena = arena,
794 .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
795 };
796
797 return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf,
798 buf_size);
799 }
800
upb_MtDecoder_DoBuildMiniTableExtension(upb_MtDecoder * decoder,const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub)801 static const char* upb_MtDecoder_DoBuildMiniTableExtension(
802 upb_MtDecoder* decoder, const char* data, size_t len,
803 upb_MiniTableExtension* ext, const upb_MiniTable* extendee,
804 upb_MiniTableSub sub) {
805 // If the string is non-empty then it must begin with a version tag.
806 if (len) {
807 if (*data != kUpb_EncodedVersion_ExtensionV1) {
808 upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data);
809 }
810 data++;
811 len--;
812 }
813
814 uint16_t count = 0;
815 upb_SubCounts sub_counts = {0, 0};
816 const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext),
817 &count, &sub_counts);
818 if (!ret || count != 1) return NULL;
819
820 upb_MiniTableField* f = &ext->UPB_PRIVATE(field);
821
822 f->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsExtension;
823 f->UPB_PRIVATE(offset) = 0;
824 f->presence = 0;
825
826 if (extendee->UPB_PRIVATE(ext) & kUpb_ExtMode_IsMessageSet) {
827 // Extensions of MessageSet must be messages.
828 if (!upb_MiniTableField_IsSubMessage(f)) return NULL;
829
830 // Extensions of MessageSet must be non-repeating.
831 if (upb_MiniTableField_IsArray(f)) return NULL;
832 }
833
834 ext->UPB_PRIVATE(extendee) = extendee;
835 ext->UPB_PRIVATE(sub) = sub;
836
837 return ret;
838 }
839
upb_MtDecoder_BuildMiniTableExtension(upb_MtDecoder * const decoder,const char * const data,const size_t len,upb_MiniTableExtension * const ext,const upb_MiniTable * const extendee,const upb_MiniTableSub sub)840 static const char* upb_MtDecoder_BuildMiniTableExtension(
841 upb_MtDecoder* const decoder, const char* const data, const size_t len,
842 upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee,
843 const upb_MiniTableSub sub) {
844 if (UPB_SETJMP(decoder->base.err) != 0) return NULL;
845 return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext,
846 extendee, sub);
847 }
848
_upb_MiniTableExtension_Init(const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Status * status)849 const char* _upb_MiniTableExtension_Init(const char* data, size_t len,
850 upb_MiniTableExtension* ext,
851 const upb_MiniTable* extendee,
852 upb_MiniTableSub sub,
853 upb_MiniTablePlatform platform,
854 upb_Status* status) {
855 upb_MtDecoder decoder = {
856 .base = {.status = status},
857 .arena = NULL,
858 .table = NULL,
859 .platform = platform,
860 };
861
862 return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext,
863 extendee, sub);
864 }
865
_upb_MiniTableExtension_Build(const char * data,size_t len,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)866 upb_MiniTableExtension* _upb_MiniTableExtension_Build(
867 const char* data, size_t len, const upb_MiniTable* extendee,
868 upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena,
869 upb_Status* status) {
870 upb_MiniTableExtension* ext =
871 upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension));
872 if (UPB_UNLIKELY(!ext)) return NULL;
873
874 const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub,
875 platform, status);
876 if (UPB_UNLIKELY(!ptr)) return NULL;
877
878 return ext;
879 }
880
_upb_MiniTable_Build(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)881 upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len,
882 upb_MiniTablePlatform platform,
883 upb_Arena* arena, upb_Status* status) {
884 void* buf = NULL;
885 size_t size = 0;
886 upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
887 &buf, &size, status);
888 free(buf);
889 return ret;
890 }
891