1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/mini_descriptor/internal/encode.h"
9
10 #include <assert.h>
11 #include <stddef.h>
12 #include <stdint.h>
13
14 #include "upb/base/internal/log2.h"
15 #include "upb/mini_descriptor/internal/base92.h"
16 #include "upb/mini_descriptor/internal/modifiers.h"
17 #include "upb/mini_descriptor/internal/wire_constants.h"
18
19 // Must be last.
20 #include "upb/port/def.inc"
21
22 typedef struct {
23 uint64_t present_values_mask;
24 uint32_t last_written_value;
25 } upb_MtDataEncoderInternal_EnumState;
26
27 typedef struct {
28 uint64_t msg_modifiers;
29 uint32_t last_field_num;
30 enum {
31 kUpb_OneofState_NotStarted,
32 kUpb_OneofState_StartedOneof,
33 kUpb_OneofState_EmittedOneofField,
34 } oneof_state;
35 } upb_MtDataEncoderInternal_MsgState;
36
37 typedef struct {
38 char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize.
39 union {
40 upb_MtDataEncoderInternal_EnumState enum_state;
41 upb_MtDataEncoderInternal_MsgState msg_state;
42 } state;
43 } upb_MtDataEncoderInternal;
44
upb_MtDataEncoder_GetInternal(upb_MtDataEncoder * e,char * buf_start)45 static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
46 upb_MtDataEncoder* e, char* buf_start) {
47 UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
48 upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
49 ret->buf_start = buf_start;
50 return ret;
51 }
52
upb_MtDataEncoder_PutRaw(upb_MtDataEncoder * e,char * ptr,char ch)53 static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr,
54 char ch) {
55 upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
56 UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
57 if (ptr == e->end) return NULL;
58 *ptr++ = ch;
59 return ptr;
60 }
61
upb_MtDataEncoder_Put(upb_MtDataEncoder * e,char * ptr,char ch)62 static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
63 return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch));
64 }
65
upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder * e,char * ptr,uint32_t val,int min,int max)66 static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
67 uint32_t val, int min, int max) {
68 int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1);
69 UPB_ASSERT(shift <= 6);
70 uint32_t mask = (1 << shift) - 1;
71 do {
72 uint32_t bits = val & mask;
73 ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min));
74 if (!ptr) return NULL;
75 val >>= shift;
76 } while (val);
77 return ptr;
78 }
79
upb_MtDataEncoder_PutModifier(upb_MtDataEncoder * e,char * ptr,uint64_t mod)80 char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
81 uint64_t mod) {
82 if (mod) {
83 ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
84 kUpb_EncodedValue_MinModifier,
85 kUpb_EncodedValue_MaxModifier);
86 }
87 return ptr;
88 }
89
upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)90 char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
91 upb_FieldType type, uint32_t field_num,
92 uint64_t field_mod) {
93 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
94 in->state.msg_state.msg_modifiers = 0;
95 in->state.msg_state.last_field_num = 0;
96 in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
97
98 ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1);
99 if (!ptr) return NULL;
100
101 return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
102 }
103
upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder * e,char * ptr,upb_FieldType key_type,upb_FieldType value_type,uint64_t key_mod,uint64_t value_mod)104 char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
105 upb_FieldType key_type,
106 upb_FieldType value_type, uint64_t key_mod,
107 uint64_t value_mod) {
108 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
109 in->state.msg_state.msg_modifiers = 0;
110 in->state.msg_state.last_field_num = 0;
111 in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
112
113 ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1);
114 if (!ptr) return NULL;
115
116 ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod);
117 if (!ptr) return NULL;
118
119 return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod);
120 }
121
upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder * e,char * ptr)122 char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) {
123 (void)upb_MtDataEncoder_GetInternal(e, ptr);
124 return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1);
125 }
126
upb_MtDataEncoder_StartMessage(upb_MtDataEncoder * e,char * ptr,uint64_t msg_mod)127 char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
128 uint64_t msg_mod) {
129 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
130 in->state.msg_state.msg_modifiers = msg_mod;
131 in->state.msg_state.last_field_num = 0;
132 in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
133
134 ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1);
135 if (!ptr) return NULL;
136
137 return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
138 }
139
_upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)140 static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e,
141 char* ptr,
142 uint32_t field_num) {
143 upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
144 if (field_num <= in->state.msg_state.last_field_num) return NULL;
145 if (in->state.msg_state.last_field_num + 1 != field_num) {
146 // Put skip.
147 UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
148 uint32_t skip = field_num - in->state.msg_state.last_field_num;
149 ptr = upb_MtDataEncoder_PutBase92Varint(
150 e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
151 if (!ptr) return NULL;
152 }
153 in->state.msg_state.last_field_num = field_num;
154 return ptr;
155 }
156
_upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint64_t field_mod)157 static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr,
158 upb_FieldType type,
159 uint64_t field_mod) {
160 static const char kUpb_TypeToEncoded[] = {
161 [kUpb_FieldType_Double] = kUpb_EncodedType_Double,
162 [kUpb_FieldType_Float] = kUpb_EncodedType_Float,
163 [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
164 [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
165 [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
166 [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
167 [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
168 [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
169 [kUpb_FieldType_String] = kUpb_EncodedType_String,
170 [kUpb_FieldType_Group] = kUpb_EncodedType_Group,
171 [kUpb_FieldType_Message] = kUpb_EncodedType_Message,
172 [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
173 [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
174 [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum,
175 [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
176 [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
177 [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
178 [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
179 };
180
181 int encoded_type = kUpb_TypeToEncoded[type];
182
183 if (field_mod & kUpb_FieldModifier_IsClosedEnum) {
184 UPB_ASSERT(type == kUpb_FieldType_Enum);
185 encoded_type = kUpb_EncodedType_ClosedEnum;
186 }
187
188 if (field_mod & kUpb_FieldModifier_IsRepeated) {
189 // Repeated fields shift the type number up (unlike other modifiers which
190 // are bit flags).
191 encoded_type += kUpb_EncodedType_RepeatedBase;
192 }
193
194 return upb_MtDataEncoder_Put(e, ptr, encoded_type);
195 }
196
_upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint64_t field_mod)197 static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e,
198 char* ptr, upb_FieldType type,
199 uint64_t field_mod) {
200 upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
201 uint32_t encoded_modifiers = 0;
202 if ((field_mod & kUpb_FieldModifier_IsRepeated) &&
203 upb_FieldType_IsPackable(type)) {
204 bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
205 bool default_is_packed = in->state.msg_state.msg_modifiers &
206 kUpb_MessageModifier_DefaultIsPacked;
207 if (field_is_packed != default_is_packed) {
208 encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
209 }
210 }
211
212 if (type == kUpb_FieldType_String) {
213 bool field_validates_utf8 = field_mod & kUpb_FieldModifier_ValidateUtf8;
214 bool message_validates_utf8 =
215 in->state.msg_state.msg_modifiers & kUpb_MessageModifier_ValidateUtf8;
216 if (field_validates_utf8 != message_validates_utf8) {
217 // Old binaries do not recognize the field modifier. We need the failure
218 // mode to be too lax rather than too strict. Our caller should have
219 // handled this (see _upb_MessageDef_ValidateUtf8()).
220 assert(!message_validates_utf8);
221 encoded_modifiers |= kUpb_EncodedFieldModifier_FlipValidateUtf8;
222 }
223 }
224
225 if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
226 encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
227 }
228
229 if (field_mod & kUpb_FieldModifier_IsRequired) {
230 encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
231 }
232
233 return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
234 }
235
upb_MtDataEncoder_PutField(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)236 char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
237 upb_FieldType type, uint32_t field_num,
238 uint64_t field_mod) {
239 upb_MtDataEncoder_GetInternal(e, ptr);
240
241 ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num);
242 if (!ptr) return NULL;
243
244 ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod);
245 if (!ptr) return NULL;
246
247 return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod);
248 }
249
upb_MtDataEncoder_StartOneof(upb_MtDataEncoder * e,char * ptr)250 char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
251 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
252 if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
253 ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End));
254 } else {
255 ptr = upb_MtDataEncoder_Put(
256 e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
257 }
258 in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
259 return ptr;
260 }
261
upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)262 char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
263 uint32_t field_num) {
264 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
265 if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
266 ptr = upb_MtDataEncoder_Put(
267 e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
268 if (!ptr) return NULL;
269 }
270 ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0),
271 _upb_ToBase92(63));
272 in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
273 return ptr;
274 }
275
upb_MtDataEncoder_StartEnum(upb_MtDataEncoder * e,char * ptr)276 char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) {
277 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
278 in->state.enum_state.present_values_mask = 0;
279 in->state.enum_state.last_written_value = 0;
280
281 return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1);
282 }
283
upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder * e,char * ptr)284 static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
285 char* ptr) {
286 upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
287 ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
288 in->state.enum_state.present_values_mask = 0;
289 in->state.enum_state.last_written_value += 5;
290 return ptr;
291 }
292
upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder * e,char * ptr,uint32_t val)293 char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
294 uint32_t val) {
295 // TODO: optimize this encoding.
296 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
297 UPB_ASSERT(val >= in->state.enum_state.last_written_value);
298 uint32_t delta = val - in->state.enum_state.last_written_value;
299 if (delta >= 5 && in->state.enum_state.present_values_mask) {
300 ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
301 if (!ptr) {
302 return NULL;
303 }
304 delta -= 5;
305 }
306
307 if (delta >= 5) {
308 ptr = upb_MtDataEncoder_PutBase92Varint(
309 e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
310 in->state.enum_state.last_written_value += delta;
311 delta = 0;
312 }
313
314 UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
315 in->state.enum_state.present_values_mask |= 1ULL << delta;
316 return ptr;
317 }
318
upb_MtDataEncoder_EndEnum(upb_MtDataEncoder * e,char * ptr)319 char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
320 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
321 if (!in->state.enum_state.present_values_mask) return ptr;
322 return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
323 }
324