xref: /aosp_15_r20/external/grpc-grpc/third_party/upb/upb/mini_descriptor/internal/encode.c (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb/mini_descriptor/internal/encode.h"
9 
10 #include <assert.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 
14 #include "upb/base/internal/log2.h"
15 #include "upb/mini_descriptor/internal/base92.h"
16 #include "upb/mini_descriptor/internal/modifiers.h"
17 #include "upb/mini_descriptor/internal/wire_constants.h"
18 
19 // Must be last.
20 #include "upb/port/def.inc"
21 
22 typedef struct {
23   uint64_t present_values_mask;
24   uint32_t last_written_value;
25 } upb_MtDataEncoderInternal_EnumState;
26 
27 typedef struct {
28   uint64_t msg_modifiers;
29   uint32_t last_field_num;
30   enum {
31     kUpb_OneofState_NotStarted,
32     kUpb_OneofState_StartedOneof,
33     kUpb_OneofState_EmittedOneofField,
34   } oneof_state;
35 } upb_MtDataEncoderInternal_MsgState;
36 
37 typedef struct {
38   char* buf_start;  // Only for checking kUpb_MtDataEncoder_MinSize.
39   union {
40     upb_MtDataEncoderInternal_EnumState enum_state;
41     upb_MtDataEncoderInternal_MsgState msg_state;
42   } state;
43 } upb_MtDataEncoderInternal;
44 
upb_MtDataEncoder_GetInternal(upb_MtDataEncoder * e,char * buf_start)45 static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
46     upb_MtDataEncoder* e, char* buf_start) {
47   UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
48   upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
49   ret->buf_start = buf_start;
50   return ret;
51 }
52 
upb_MtDataEncoder_PutRaw(upb_MtDataEncoder * e,char * ptr,char ch)53 static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr,
54                                       char ch) {
55   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
56   UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
57   if (ptr == e->end) return NULL;
58   *ptr++ = ch;
59   return ptr;
60 }
61 
upb_MtDataEncoder_Put(upb_MtDataEncoder * e,char * ptr,char ch)62 static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
63   return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch));
64 }
65 
upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder * e,char * ptr,uint32_t val,int min,int max)66 static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
67                                                uint32_t val, int min, int max) {
68   int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1);
69   UPB_ASSERT(shift <= 6);
70   uint32_t mask = (1 << shift) - 1;
71   do {
72     uint32_t bits = val & mask;
73     ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min));
74     if (!ptr) return NULL;
75     val >>= shift;
76   } while (val);
77   return ptr;
78 }
79 
upb_MtDataEncoder_PutModifier(upb_MtDataEncoder * e,char * ptr,uint64_t mod)80 char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
81                                     uint64_t mod) {
82   if (mod) {
83     ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
84                                             kUpb_EncodedValue_MinModifier,
85                                             kUpb_EncodedValue_MaxModifier);
86   }
87   return ptr;
88 }
89 
upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)90 char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
91                                         upb_FieldType type, uint32_t field_num,
92                                         uint64_t field_mod) {
93   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
94   in->state.msg_state.msg_modifiers = 0;
95   in->state.msg_state.last_field_num = 0;
96   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
97 
98   ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1);
99   if (!ptr) return NULL;
100 
101   return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
102 }
103 
upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder * e,char * ptr,upb_FieldType key_type,upb_FieldType value_type,uint64_t key_mod,uint64_t value_mod)104 char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
105                                   upb_FieldType key_type,
106                                   upb_FieldType value_type, uint64_t key_mod,
107                                   uint64_t value_mod) {
108   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
109   in->state.msg_state.msg_modifiers = 0;
110   in->state.msg_state.last_field_num = 0;
111   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
112 
113   ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1);
114   if (!ptr) return NULL;
115 
116   ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod);
117   if (!ptr) return NULL;
118 
119   return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod);
120 }
121 
upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder * e,char * ptr)122 char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) {
123   (void)upb_MtDataEncoder_GetInternal(e, ptr);
124   return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1);
125 }
126 
upb_MtDataEncoder_StartMessage(upb_MtDataEncoder * e,char * ptr,uint64_t msg_mod)127 char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
128                                      uint64_t msg_mod) {
129   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
130   in->state.msg_state.msg_modifiers = msg_mod;
131   in->state.msg_state.last_field_num = 0;
132   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
133 
134   ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1);
135   if (!ptr) return NULL;
136 
137   return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
138 }
139 
_upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)140 static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e,
141                                                   char* ptr,
142                                                   uint32_t field_num) {
143   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
144   if (field_num <= in->state.msg_state.last_field_num) return NULL;
145   if (in->state.msg_state.last_field_num + 1 != field_num) {
146     // Put skip.
147     UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
148     uint32_t skip = field_num - in->state.msg_state.last_field_num;
149     ptr = upb_MtDataEncoder_PutBase92Varint(
150         e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
151     if (!ptr) return NULL;
152   }
153   in->state.msg_state.last_field_num = field_num;
154   return ptr;
155 }
156 
_upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint64_t field_mod)157 static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr,
158                                              upb_FieldType type,
159                                              uint64_t field_mod) {
160   static const char kUpb_TypeToEncoded[] = {
161       [kUpb_FieldType_Double] = kUpb_EncodedType_Double,
162       [kUpb_FieldType_Float] = kUpb_EncodedType_Float,
163       [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
164       [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
165       [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
166       [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
167       [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
168       [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
169       [kUpb_FieldType_String] = kUpb_EncodedType_String,
170       [kUpb_FieldType_Group] = kUpb_EncodedType_Group,
171       [kUpb_FieldType_Message] = kUpb_EncodedType_Message,
172       [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
173       [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
174       [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum,
175       [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
176       [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
177       [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
178       [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
179   };
180 
181   int encoded_type = kUpb_TypeToEncoded[type];
182 
183   if (field_mod & kUpb_FieldModifier_IsClosedEnum) {
184     UPB_ASSERT(type == kUpb_FieldType_Enum);
185     encoded_type = kUpb_EncodedType_ClosedEnum;
186   }
187 
188   if (field_mod & kUpb_FieldModifier_IsRepeated) {
189     // Repeated fields shift the type number up (unlike other modifiers which
190     // are bit flags).
191     encoded_type += kUpb_EncodedType_RepeatedBase;
192   }
193 
194   return upb_MtDataEncoder_Put(e, ptr, encoded_type);
195 }
196 
_upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint64_t field_mod)197 static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e,
198                                                   char* ptr, upb_FieldType type,
199                                                   uint64_t field_mod) {
200   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
201   uint32_t encoded_modifiers = 0;
202   if ((field_mod & kUpb_FieldModifier_IsRepeated) &&
203       upb_FieldType_IsPackable(type)) {
204     bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
205     bool default_is_packed = in->state.msg_state.msg_modifiers &
206                              kUpb_MessageModifier_DefaultIsPacked;
207     if (field_is_packed != default_is_packed) {
208       encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
209     }
210   }
211 
212   if (type == kUpb_FieldType_String) {
213     bool field_validates_utf8 = field_mod & kUpb_FieldModifier_ValidateUtf8;
214     bool message_validates_utf8 =
215         in->state.msg_state.msg_modifiers & kUpb_MessageModifier_ValidateUtf8;
216     if (field_validates_utf8 != message_validates_utf8) {
217       // Old binaries do not recognize the field modifier.  We need the failure
218       // mode to be too lax rather than too strict.  Our caller should have
219       // handled this (see _upb_MessageDef_ValidateUtf8()).
220       assert(!message_validates_utf8);
221       encoded_modifiers |= kUpb_EncodedFieldModifier_FlipValidateUtf8;
222     }
223   }
224 
225   if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
226     encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
227   }
228 
229   if (field_mod & kUpb_FieldModifier_IsRequired) {
230     encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
231   }
232 
233   return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
234 }
235 
upb_MtDataEncoder_PutField(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)236 char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
237                                  upb_FieldType type, uint32_t field_num,
238                                  uint64_t field_mod) {
239   upb_MtDataEncoder_GetInternal(e, ptr);
240 
241   ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num);
242   if (!ptr) return NULL;
243 
244   ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod);
245   if (!ptr) return NULL;
246 
247   return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod);
248 }
249 
upb_MtDataEncoder_StartOneof(upb_MtDataEncoder * e,char * ptr)250 char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
251   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
252   if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
253     ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End));
254   } else {
255     ptr = upb_MtDataEncoder_Put(
256         e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
257   }
258   in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
259   return ptr;
260 }
261 
upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)262 char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
263                                       uint32_t field_num) {
264   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
265   if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
266     ptr = upb_MtDataEncoder_Put(
267         e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
268     if (!ptr) return NULL;
269   }
270   ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0),
271                                           _upb_ToBase92(63));
272   in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
273   return ptr;
274 }
275 
upb_MtDataEncoder_StartEnum(upb_MtDataEncoder * e,char * ptr)276 char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) {
277   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
278   in->state.enum_state.present_values_mask = 0;
279   in->state.enum_state.last_written_value = 0;
280 
281   return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1);
282 }
283 
upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder * e,char * ptr)284 static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
285                                                   char* ptr) {
286   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
287   ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
288   in->state.enum_state.present_values_mask = 0;
289   in->state.enum_state.last_written_value += 5;
290   return ptr;
291 }
292 
upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder * e,char * ptr,uint32_t val)293 char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
294                                      uint32_t val) {
295   // TODO: optimize this encoding.
296   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
297   UPB_ASSERT(val >= in->state.enum_state.last_written_value);
298   uint32_t delta = val - in->state.enum_state.last_written_value;
299   if (delta >= 5 && in->state.enum_state.present_values_mask) {
300     ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
301     if (!ptr) {
302       return NULL;
303     }
304     delta -= 5;
305   }
306 
307   if (delta >= 5) {
308     ptr = upb_MtDataEncoder_PutBase92Varint(
309         e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
310     in->state.enum_state.last_written_value += delta;
311     delta = 0;
312   }
313 
314   UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
315   in->state.enum_state.present_values_mask |= 1ULL << delta;
316   return ptr;
317 }
318 
upb_MtDataEncoder_EndEnum(upb_MtDataEncoder * e,char * ptr)319 char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
320   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
321   if (!in->state.enum_state.present_values_mask) return ptr;
322   return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
323 }
324