1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <inttypes.h>
29 
30 #include "upb/base/log2.h"
31 #include "upb/mem/arena.h"
32 #include "upb/mini_table/common.h"
33 #include "upb/mini_table/common_internal.h"
34 #include "upb/mini_table/encode_internal.h"
35 
36 // Must be last.
37 #include "upb/port/def.inc"
38 
39 typedef struct {
40   uint64_t present_values_mask;
41   uint32_t last_written_value;
42 } upb_MtDataEncoderInternal_EnumState;
43 
44 typedef struct {
45   uint64_t msg_modifiers;
46   uint32_t last_field_num;
47   enum {
48     kUpb_OneofState_NotStarted,
49     kUpb_OneofState_StartedOneof,
50     kUpb_OneofState_EmittedOneofField,
51   } oneof_state;
52 } upb_MtDataEncoderInternal_MsgState;
53 
54 typedef struct {
55   char* buf_start;  // Only for checking kUpb_MtDataEncoder_MinSize.
56   union {
57     upb_MtDataEncoderInternal_EnumState enum_state;
58     upb_MtDataEncoderInternal_MsgState msg_state;
59   } state;
60 } upb_MtDataEncoderInternal;
61 
upb_MtDataEncoder_GetInternal(upb_MtDataEncoder * e,char * buf_start)62 static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
63     upb_MtDataEncoder* e, char* buf_start) {
64   UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
65   upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
66   ret->buf_start = buf_start;
67   return ret;
68 }
69 
upb_MtDataEncoder_PutRaw(upb_MtDataEncoder * e,char * ptr,char ch)70 static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr,
71                                       char ch) {
72   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
73   UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
74   if (ptr == e->end) return NULL;
75   *ptr++ = ch;
76   return ptr;
77 }
78 
upb_MtDataEncoder_Put(upb_MtDataEncoder * e,char * ptr,char ch)79 static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
80   return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch));
81 }
82 
upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder * e,char * ptr,uint32_t val,int min,int max)83 static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
84                                                uint32_t val, int min, int max) {
85   int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1);
86   UPB_ASSERT(shift <= 6);
87   uint32_t mask = (1 << shift) - 1;
88   do {
89     uint32_t bits = val & mask;
90     ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min));
91     if (!ptr) return NULL;
92     val >>= shift;
93   } while (val);
94   return ptr;
95 }
96 
upb_MtDataEncoder_PutModifier(upb_MtDataEncoder * e,char * ptr,uint64_t mod)97 char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
98                                     uint64_t mod) {
99   if (mod) {
100     ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
101                                             kUpb_EncodedValue_MinModifier,
102                                             kUpb_EncodedValue_MaxModifier);
103   }
104   return ptr;
105 }
106 
upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)107 char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
108                                         upb_FieldType type, uint32_t field_num,
109                                         uint64_t field_mod) {
110   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
111   in->state.msg_state.msg_modifiers = 0;
112   in->state.msg_state.last_field_num = 0;
113   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
114 
115   ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1);
116   if (!ptr) return NULL;
117 
118   return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
119 }
120 
upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder * e,char * ptr,upb_FieldType key_type,upb_FieldType value_type,uint64_t key_mod,uint64_t value_mod)121 char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
122                                   upb_FieldType key_type,
123                                   upb_FieldType value_type, uint64_t key_mod,
124                                   uint64_t value_mod) {
125   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
126   in->state.msg_state.msg_modifiers = 0;
127   in->state.msg_state.last_field_num = 0;
128   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
129 
130   ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1);
131   if (!ptr) return NULL;
132 
133   ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod);
134   if (!ptr) return NULL;
135 
136   return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod);
137 }
138 
upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder * e,char * ptr)139 char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) {
140   (void)upb_MtDataEncoder_GetInternal(e, ptr);
141   return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1);
142 }
143 
upb_MtDataEncoder_StartMessage(upb_MtDataEncoder * e,char * ptr,uint64_t msg_mod)144 char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
145                                      uint64_t msg_mod) {
146   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
147   in->state.msg_state.msg_modifiers = msg_mod;
148   in->state.msg_state.last_field_num = 0;
149   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
150 
151   ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1);
152   if (!ptr) return NULL;
153 
154   return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
155 }
156 
upb_MtDataEncoder_PutField(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)157 char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
158                                  upb_FieldType type, uint32_t field_num,
159                                  uint64_t field_mod) {
160   static const char kUpb_TypeToEncoded[] = {
161       [kUpb_FieldType_Double] = kUpb_EncodedType_Double,
162       [kUpb_FieldType_Float] = kUpb_EncodedType_Float,
163       [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
164       [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
165       [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
166       [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
167       [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
168       [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
169       [kUpb_FieldType_String] = kUpb_EncodedType_String,
170       [kUpb_FieldType_Group] = kUpb_EncodedType_Group,
171       [kUpb_FieldType_Message] = kUpb_EncodedType_Message,
172       [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
173       [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
174       [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum,
175       [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
176       [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
177       [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
178       [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
179   };
180 
181   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
182   if (field_num <= in->state.msg_state.last_field_num) return NULL;
183   if (in->state.msg_state.last_field_num + 1 != field_num) {
184     // Put skip.
185     UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
186     uint32_t skip = field_num - in->state.msg_state.last_field_num;
187     ptr = upb_MtDataEncoder_PutBase92Varint(
188         e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
189     if (!ptr) return NULL;
190   }
191   in->state.msg_state.last_field_num = field_num;
192 
193   uint32_t encoded_modifiers = 0;
194 
195   // Put field type.
196   int encoded_type = kUpb_TypeToEncoded[type];
197   if (field_mod & kUpb_FieldModifier_IsClosedEnum) {
198     UPB_ASSERT(type == kUpb_FieldType_Enum);
199     encoded_type = kUpb_EncodedType_ClosedEnum;
200   }
201   if (field_mod & kUpb_FieldModifier_IsRepeated) {
202     // Repeated fields shift the type number up (unlike other modifiers which
203     // are bit flags).
204     encoded_type += kUpb_EncodedType_RepeatedBase;
205 
206     if (upb_FieldType_IsPackable(type)) {
207       bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
208       bool default_is_packed = in->state.msg_state.msg_modifiers &
209                                kUpb_MessageModifier_DefaultIsPacked;
210       if (field_is_packed != default_is_packed) {
211         encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
212       }
213     }
214   }
215   ptr = upb_MtDataEncoder_Put(e, ptr, encoded_type);
216   if (!ptr) return NULL;
217 
218   if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
219     encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
220   }
221   if (field_mod & kUpb_FieldModifier_IsRequired) {
222     encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
223   }
224   return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
225 }
226 
upb_MtDataEncoder_StartOneof(upb_MtDataEncoder * e,char * ptr)227 char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
228   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
229   if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
230     ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End));
231   } else {
232     ptr = upb_MtDataEncoder_Put(
233         e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
234   }
235   in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
236   return ptr;
237 }
238 
upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)239 char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
240                                       uint32_t field_num) {
241   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
242   if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
243     ptr = upb_MtDataEncoder_Put(
244         e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
245     if (!ptr) return NULL;
246   }
247   ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0),
248                                           _upb_ToBase92(63));
249   in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
250   return ptr;
251 }
252 
upb_MtDataEncoder_StartEnum(upb_MtDataEncoder * e,char * ptr)253 char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) {
254   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
255   in->state.enum_state.present_values_mask = 0;
256   in->state.enum_state.last_written_value = 0;
257 
258   return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1);
259 }
260 
upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder * e,char * ptr)261 static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
262                                                   char* ptr) {
263   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
264   ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
265   in->state.enum_state.present_values_mask = 0;
266   in->state.enum_state.last_written_value += 5;
267   return ptr;
268 }
269 
upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder * e,char * ptr,uint32_t val)270 char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
271                                      uint32_t val) {
272   // TODO(b/229641772): optimize this encoding.
273   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
274   UPB_ASSERT(val >= in->state.enum_state.last_written_value);
275   uint32_t delta = val - in->state.enum_state.last_written_value;
276   if (delta >= 5 && in->state.enum_state.present_values_mask) {
277     ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
278     if (!ptr) {
279       return NULL;
280     }
281     delta -= 5;
282   }
283 
284   if (delta >= 5) {
285     ptr = upb_MtDataEncoder_PutBase92Varint(
286         e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
287     in->state.enum_state.last_written_value += delta;
288     delta = 0;
289   }
290 
291   UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
292   in->state.enum_state.present_values_mask |= 1ULL << delta;
293   return ptr;
294 }
295 
upb_MtDataEncoder_EndEnum(upb_MtDataEncoder * e,char * ptr)296 char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
297   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
298   if (!in->state.enum_state.present_values_mask) return ptr;
299   return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
300 }
301