1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * upb_table Implementation
30  *
31  * Implementation is heavily inspired by Lua's ltable.c.
32  */
33 
34 #include <string.h>
35 
36 #include "upb/base/log2.h"
37 #include "upb/hash/int_table.h"
38 #include "upb/hash/str_table.h"
39 
40 // Must be last.
41 #include "upb/port/def.inc"
42 
43 #define UPB_MAXARRSIZE 16  // 2**16 = 64k.
44 
45 // From Chromium.
46 #define ARRAY_SIZE(x) \
47   ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
48 
49 static const double MAX_LOAD = 0.85;
50 
51 /* The minimum utilization of the array part of a mixed hash/array table.  This
52  * is a speed/memory-usage tradeoff (though it's not straightforward because of
53  * cache effects).  The lower this is, the more memory we'll use. */
54 static const double MIN_DENSITY = 0.1;
55 
is_pow2(uint64_t v)56 static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
57 
_upb_value_val(uint64_t val)58 static upb_value _upb_value_val(uint64_t val) {
59   upb_value ret;
60   _upb_value_setval(&ret, val);
61   return ret;
62 }
63 
log2ceil(uint64_t v)64 static int log2ceil(uint64_t v) {
65   int ret = 0;
66   bool pow2 = is_pow2(v);
67   while (v >>= 1) ret++;
68   ret = pow2 ? ret : ret + 1;  // Ceiling.
69   return UPB_MIN(UPB_MAXARRSIZE, ret);
70 }
71 
upb_strdup2(const char * s,size_t len,upb_Arena * a)72 char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
73   size_t n;
74   char* p;
75 
76   /* Prevent overflow errors. */
77   if (len == SIZE_MAX) return NULL;
78   /* Always null-terminate, even if binary data; but don't rely on the input to
79    * have a null-terminating byte since it may be a raw binary buffer. */
80   n = len + 1;
81   p = upb_Arena_Malloc(a, n);
82   if (p) {
83     if (len != 0) memcpy(p, s, len);
84     p[len] = 0;
85   }
86   return p;
87 }
88 
89 /* A type to represent the lookup key of either a strtable or an inttable. */
90 typedef union {
91   uintptr_t num;
92   struct {
93     const char* str;
94     size_t len;
95   } str;
96 } lookupkey_t;
97 
strkey2(const char * str,size_t len)98 static lookupkey_t strkey2(const char* str, size_t len) {
99   lookupkey_t k;
100   k.str.str = str;
101   k.str.len = len;
102   return k;
103 }
104 
intkey(uintptr_t key)105 static lookupkey_t intkey(uintptr_t key) {
106   lookupkey_t k;
107   k.num = key;
108   return k;
109 }
110 
111 typedef uint32_t hashfunc_t(upb_tabkey key);
112 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
113 
114 /* Base table (shared code) ***************************************************/
115 
upb_inthash(uintptr_t key)116 static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
117 
upb_getentry(const upb_table * t,uint32_t hash)118 static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
119   return t->entries + (hash & t->mask);
120 }
121 
upb_arrhas(upb_tabval key)122 static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
123 
isfull(upb_table * t)124 static bool isfull(upb_table* t) { return t->count == t->max_count; }
125 
init(upb_table * t,uint8_t size_lg2,upb_Arena * a)126 static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
127   size_t bytes;
128 
129   t->count = 0;
130   t->size_lg2 = size_lg2;
131   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
132   t->max_count = upb_table_size(t) * MAX_LOAD;
133   bytes = upb_table_size(t) * sizeof(upb_tabent);
134   if (bytes > 0) {
135     t->entries = upb_Arena_Malloc(a, bytes);
136     if (!t->entries) return false;
137     memset(t->entries, 0, bytes);
138   } else {
139     t->entries = NULL;
140   }
141   return true;
142 }
143 
emptyent(upb_table * t,upb_tabent * e)144 static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
145   upb_tabent* begin = t->entries;
146   upb_tabent* end = begin + upb_table_size(t);
147   for (e = e + 1; e < end; e++) {
148     if (upb_tabent_isempty(e)) return e;
149   }
150   for (e = begin; e < end; e++) {
151     if (upb_tabent_isempty(e)) return e;
152   }
153   UPB_ASSERT(false);
154   return NULL;
155 }
156 
getentry_mutable(upb_table * t,uint32_t hash)157 static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
158   return (upb_tabent*)upb_getentry(t, hash);
159 }
160 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)161 static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
162                                    uint32_t hash, eqlfunc_t* eql) {
163   const upb_tabent* e;
164 
165   if (t->size_lg2 == 0) return NULL;
166   e = upb_getentry(t, hash);
167   if (upb_tabent_isempty(e)) return NULL;
168   while (1) {
169     if (eql(e->key, key)) return e;
170     if ((e = e->next) == NULL) return NULL;
171   }
172 }
173 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)174 static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
175                                      uint32_t hash, eqlfunc_t* eql) {
176   return (upb_tabent*)findentry(t, key, hash, eql);
177 }
178 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)179 static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
180                    uint32_t hash, eqlfunc_t* eql) {
181   const upb_tabent* e = findentry(t, key, hash, eql);
182   if (e) {
183     if (v) {
184       _upb_value_setval(v, e->val.val);
185     }
186     return true;
187   } else {
188     return false;
189   }
190 }
191 
192 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)193 static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
194                    upb_value val, uint32_t hash, hashfunc_t* hashfunc,
195                    eqlfunc_t* eql) {
196   upb_tabent* mainpos_e;
197   upb_tabent* our_e;
198 
199   UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
200 
201   t->count++;
202   mainpos_e = getentry_mutable(t, hash);
203   our_e = mainpos_e;
204 
205   if (upb_tabent_isempty(mainpos_e)) {
206     /* Our main position is empty; use it. */
207     our_e->next = NULL;
208   } else {
209     /* Collision. */
210     upb_tabent* new_e = emptyent(t, mainpos_e);
211     /* Head of collider's chain. */
212     upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
213     if (chain == mainpos_e) {
214       /* Existing ent is in its main position (it has the same hash as us, and
215        * is the head of our chain).  Insert to new ent and append to this chain.
216        */
217       new_e->next = mainpos_e->next;
218       mainpos_e->next = new_e;
219       our_e = new_e;
220     } else {
221       /* Existing ent is not in its main position (it is a node in some other
222        * chain).  This implies that no existing ent in the table has our hash.
223        * Evict it (updating its chain) and use its ent for head of our chain. */
224       *new_e = *mainpos_e; /* copies next. */
225       while (chain->next != mainpos_e) {
226         chain = (upb_tabent*)chain->next;
227         UPB_ASSERT(chain);
228       }
229       chain->next = new_e;
230       our_e = mainpos_e;
231       our_e->next = NULL;
232     }
233   }
234   our_e->key = tabkey;
235   our_e->val.val = val.val;
236   UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
237 }
238 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)239 static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
240                upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
241   upb_tabent* chain = getentry_mutable(t, hash);
242   if (upb_tabent_isempty(chain)) return false;
243   if (eql(chain->key, key)) {
244     /* Element to remove is at the head of its chain. */
245     t->count--;
246     if (val) _upb_value_setval(val, chain->val.val);
247     if (removed) *removed = chain->key;
248     if (chain->next) {
249       upb_tabent* move = (upb_tabent*)chain->next;
250       *chain = *move;
251       move->key = 0; /* Make the slot empty. */
252     } else {
253       chain->key = 0; /* Make the slot empty. */
254     }
255     return true;
256   } else {
257     /* Element to remove is either in a non-head position or not in the
258      * table. */
259     while (chain->next && !eql(chain->next->key, key)) {
260       chain = (upb_tabent*)chain->next;
261     }
262     if (chain->next) {
263       /* Found element to remove. */
264       upb_tabent* rm = (upb_tabent*)chain->next;
265       t->count--;
266       if (val) _upb_value_setval(val, chain->next->val.val);
267       if (removed) *removed = rm->key;
268       rm->key = 0; /* Make the slot empty. */
269       chain->next = rm->next;
270       return true;
271     } else {
272       /* Element to remove is not in the table. */
273       return false;
274     }
275   }
276 }
277 
next(const upb_table * t,size_t i)278 static size_t next(const upb_table* t, size_t i) {
279   do {
280     if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
281   } while (upb_tabent_isempty(&t->entries[i]));
282 
283   return i;
284 }
285 
begin(const upb_table * t)286 static size_t begin(const upb_table* t) { return next(t, -1); }
287 
288 /* upb_strtable ***************************************************************/
289 
290 /* A simple "subclass" of upb_table that only adds a hash function for strings.
291  */
292 
strcopy(lookupkey_t k2,upb_Arena * a)293 static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
294   uint32_t len = (uint32_t)k2.str.len;
295   char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
296   if (str == NULL) return 0;
297   memcpy(str, &len, sizeof(uint32_t));
298   if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
299   str[sizeof(uint32_t) + k2.str.len] = '\0';
300   return (uintptr_t)str;
301 }
302 
303 /* Adapted from ABSL's wyhash. */
304 
UnalignedLoad64(const void * p)305 static uint64_t UnalignedLoad64(const void* p) {
306   uint64_t val;
307   memcpy(&val, p, 8);
308   return val;
309 }
310 
UnalignedLoad32(const void * p)311 static uint32_t UnalignedLoad32(const void* p) {
312   uint32_t val;
313   memcpy(&val, p, 4);
314   return val;
315 }
316 
317 #if defined(_MSC_VER) && defined(_M_X64)
318 #include <intrin.h>
319 #endif
320 
321 /* Computes a * b, returning the low 64 bits of the result and storing the high
322  * 64 bits in |*high|. */
upb_umul128(uint64_t v0,uint64_t v1,uint64_t * out_high)323 static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
324 #ifdef __SIZEOF_INT128__
325   __uint128_t p = v0;
326   p *= v1;
327   *out_high = (uint64_t)(p >> 64);
328   return (uint64_t)p;
329 #elif defined(_MSC_VER) && defined(_M_X64)
330   return _umul128(v0, v1, out_high);
331 #else
332   uint64_t a32 = v0 >> 32;
333   uint64_t a00 = v0 & 0xffffffff;
334   uint64_t b32 = v1 >> 32;
335   uint64_t b00 = v1 & 0xffffffff;
336   uint64_t high = a32 * b32;
337   uint64_t low = a00 * b00;
338   uint64_t mid1 = a32 * b00;
339   uint64_t mid2 = a00 * b32;
340   low += (mid1 << 32) + (mid2 << 32);
341   // Omit carry bit, for mixing we do not care about exact numerical precision.
342   high += (mid1 >> 32) + (mid2 >> 32);
343   *out_high = high;
344   return low;
345 #endif
346 }
347 
WyhashMix(uint64_t v0,uint64_t v1)348 static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
349   uint64_t high;
350   uint64_t low = upb_umul128(v0, v1, &high);
351   return low ^ high;
352 }
353 
Wyhash(const void * data,size_t len,uint64_t seed,const uint64_t salt[])354 static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
355                        const uint64_t salt[]) {
356   const uint8_t* ptr = (const uint8_t*)data;
357   uint64_t starting_length = (uint64_t)len;
358   uint64_t current_state = seed ^ salt[0];
359 
360   if (len > 64) {
361     // If we have more than 64 bytes, we're going to handle chunks of 64
362     // bytes at a time. We're going to build up two separate hash states
363     // which we will then hash together.
364     uint64_t duplicated_state = current_state;
365 
366     do {
367       uint64_t a = UnalignedLoad64(ptr);
368       uint64_t b = UnalignedLoad64(ptr + 8);
369       uint64_t c = UnalignedLoad64(ptr + 16);
370       uint64_t d = UnalignedLoad64(ptr + 24);
371       uint64_t e = UnalignedLoad64(ptr + 32);
372       uint64_t f = UnalignedLoad64(ptr + 40);
373       uint64_t g = UnalignedLoad64(ptr + 48);
374       uint64_t h = UnalignedLoad64(ptr + 56);
375 
376       uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
377       uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
378       current_state = (cs0 ^ cs1);
379 
380       uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
381       uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
382       duplicated_state = (ds0 ^ ds1);
383 
384       ptr += 64;
385       len -= 64;
386     } while (len > 64);
387 
388     current_state = current_state ^ duplicated_state;
389   }
390 
391   // We now have a data `ptr` with at most 64 bytes and the current state
392   // of the hashing state machine stored in current_state.
393   while (len > 16) {
394     uint64_t a = UnalignedLoad64(ptr);
395     uint64_t b = UnalignedLoad64(ptr + 8);
396 
397     current_state = WyhashMix(a ^ salt[1], b ^ current_state);
398 
399     ptr += 16;
400     len -= 16;
401   }
402 
403   // We now have a data `ptr` with at most 16 bytes.
404   uint64_t a = 0;
405   uint64_t b = 0;
406   if (len > 8) {
407     // When we have at least 9 and at most 16 bytes, set A to the first 64
408     // bits of the input and B to the last 64 bits of the input. Yes, they will
409     // overlap in the middle if we are working with less than the full 16
410     // bytes.
411     a = UnalignedLoad64(ptr);
412     b = UnalignedLoad64(ptr + len - 8);
413   } else if (len > 3) {
414     // If we have at least 4 and at most 8 bytes, set A to the first 32
415     // bits and B to the last 32 bits.
416     a = UnalignedLoad32(ptr);
417     b = UnalignedLoad32(ptr + len - 4);
418   } else if (len > 0) {
419     // If we have at least 1 and at most 3 bytes, read all of the provided
420     // bits into A, with some adjustments.
421     a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
422     b = 0;
423   } else {
424     a = 0;
425     b = 0;
426   }
427 
428   uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
429   uint64_t z = salt[1] ^ starting_length;
430   return WyhashMix(w, z);
431 }
432 
433 const uint64_t kWyhashSalt[5] = {
434     0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
435     0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
436 };
437 
_upb_Hash(const void * p,size_t n,uint64_t seed)438 uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
439   return Wyhash(p, n, seed, kWyhashSalt);
440 }
441 
_upb_Hash_NoSeed(const char * p,size_t n)442 static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
443   return _upb_Hash(p, n, 0);
444 }
445 
strhash(upb_tabkey key)446 static uint32_t strhash(upb_tabkey key) {
447   uint32_t len;
448   char* str = upb_tabstr(key, &len);
449   return _upb_Hash_NoSeed(str, len);
450 }
451 
streql(upb_tabkey k1,lookupkey_t k2)452 static bool streql(upb_tabkey k1, lookupkey_t k2) {
453   uint32_t len;
454   char* str = upb_tabstr(k1, &len);
455   return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
456 }
457 
upb_strtable_init(upb_strtable * t,size_t expected_size,upb_Arena * a)458 bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
459   // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
460   // denominator.
461   size_t need_entries = (expected_size + 1) * 1204 / 1024;
462   UPB_ASSERT(need_entries >= expected_size * 0.85);
463   int size_lg2 = upb_Log2Ceiling(need_entries);
464   return init(&t->t, size_lg2, a);
465 }
466 
upb_strtable_clear(upb_strtable * t)467 void upb_strtable_clear(upb_strtable* t) {
468   size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
469   t->t.count = 0;
470   memset((char*)t->t.entries, 0, bytes);
471 }
472 
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_Arena * a)473 bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
474   upb_strtable new_table;
475   if (!init(&new_table.t, size_lg2, a)) return false;
476 
477   intptr_t iter = UPB_STRTABLE_BEGIN;
478   upb_StringView key;
479   upb_value val;
480   while (upb_strtable_next2(t, &key, &val, &iter)) {
481     upb_strtable_insert(&new_table, key.data, key.size, val, a);
482   }
483   *t = new_table;
484   return true;
485 }
486 
upb_strtable_insert(upb_strtable * t,const char * k,size_t len,upb_value v,upb_Arena * a)487 bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
488                          upb_value v, upb_Arena* a) {
489   lookupkey_t key;
490   upb_tabkey tabkey;
491   uint32_t hash;
492 
493   if (isfull(&t->t)) {
494     /* Need to resize.  New table of double the size, add old elements to it. */
495     if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
496       return false;
497     }
498   }
499 
500   key = strkey2(k, len);
501   tabkey = strcopy(key, a);
502   if (tabkey == 0) return false;
503 
504   hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
505   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
506   return true;
507 }
508 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)509 bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
510                           upb_value* v) {
511   uint32_t hash = _upb_Hash_NoSeed(key, len);
512   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
513 }
514 
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)515 bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
516                           upb_value* val) {
517   uint32_t hash = _upb_Hash_NoSeed(key, len);
518   upb_tabkey tabkey;
519   return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
520 }
521 
522 /* Iteration */
523 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)524 void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
525   i->t = t;
526   i->index = begin(&t->t);
527 }
528 
upb_strtable_next(upb_strtable_iter * i)529 void upb_strtable_next(upb_strtable_iter* i) {
530   i->index = next(&i->t->t, i->index);
531 }
532 
upb_strtable_done(const upb_strtable_iter * i)533 bool upb_strtable_done(const upb_strtable_iter* i) {
534   if (!i->t) return true;
535   return i->index >= upb_table_size(&i->t->t) ||
536          upb_tabent_isempty(str_tabent(i));
537 }
538 
upb_strtable_iter_key(const upb_strtable_iter * i)539 upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
540   upb_StringView key;
541   uint32_t len;
542   UPB_ASSERT(!upb_strtable_done(i));
543   key.data = upb_tabstr(str_tabent(i)->key, &len);
544   key.size = len;
545   return key;
546 }
547 
upb_strtable_iter_value(const upb_strtable_iter * i)548 upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
549   UPB_ASSERT(!upb_strtable_done(i));
550   return _upb_value_val(str_tabent(i)->val.val);
551 }
552 
upb_strtable_iter_setdone(upb_strtable_iter * i)553 void upb_strtable_iter_setdone(upb_strtable_iter* i) {
554   i->t = NULL;
555   i->index = SIZE_MAX;
556 }
557 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)558 bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
559                                const upb_strtable_iter* i2) {
560   if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
561   return i1->t == i2->t && i1->index == i2->index;
562 }
563 
564 /* upb_inttable ***************************************************************/
565 
566 /* For inttables we use a hybrid structure where small keys are kept in an
567  * array and large keys are put in the hash table. */
568 
inthash(upb_tabkey key)569 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
570 
inteql(upb_tabkey k1,lookupkey_t k2)571 static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
572 
mutable_array(upb_inttable * t)573 static upb_tabval* mutable_array(upb_inttable* t) {
574   return (upb_tabval*)t->array;
575 }
576 
inttable_val(upb_inttable * t,uintptr_t key)577 static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
578   if (key < t->array_size) {
579     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
580   } else {
581     upb_tabent* e =
582         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
583     return e ? &e->val : NULL;
584   }
585 }
586 
inttable_val_const(const upb_inttable * t,uintptr_t key)587 static const upb_tabval* inttable_val_const(const upb_inttable* t,
588                                             uintptr_t key) {
589   return inttable_val((upb_inttable*)t, key);
590 }
591 
upb_inttable_count(const upb_inttable * t)592 size_t upb_inttable_count(const upb_inttable* t) {
593   return t->t.count + t->array_count;
594 }
595 
check(upb_inttable * t)596 static void check(upb_inttable* t) {
597   UPB_UNUSED(t);
598 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
599   {
600     // This check is very expensive (makes inserts/deletes O(N)).
601     size_t count = 0;
602     intptr_t iter = UPB_INTTABLE_BEGIN;
603     uintptr_t key;
604     upb_value val;
605     while (upb_inttable_next(t, &key, &val, &iter)) {
606       UPB_ASSERT(upb_inttable_lookup(t, key, NULL));
607     }
608     UPB_ASSERT(count == upb_inttable_count(t));
609   }
610 #endif
611 }
612 
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_Arena * a)613 bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
614                             upb_Arena* a) {
615   size_t array_bytes;
616 
617   if (!init(&t->t, hsize_lg2, a)) return false;
618   /* Always make the array part at least 1 long, so that we know key 0
619    * won't be in the hash part, which simplifies things. */
620   t->array_size = UPB_MAX(1, asize);
621   t->array_count = 0;
622   array_bytes = t->array_size * sizeof(upb_value);
623   t->array = upb_Arena_Malloc(a, array_bytes);
624   if (!t->array) {
625     return false;
626   }
627   memset(mutable_array(t), 0xff, array_bytes);
628   check(t);
629   return true;
630 }
631 
upb_inttable_init(upb_inttable * t,upb_Arena * a)632 bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
633   return upb_inttable_sizedinit(t, 0, 4, a);
634 }
635 
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val,upb_Arena * a)636 bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
637                          upb_Arena* a) {
638   upb_tabval tabval;
639   tabval.val = val.val;
640   UPB_ASSERT(
641       upb_arrhas(tabval)); /* This will reject (uint64_t)-1.  Fix this. */
642 
643   if (key < t->array_size) {
644     UPB_ASSERT(!upb_arrhas(t->array[key]));
645     t->array_count++;
646     mutable_array(t)[key].val = val.val;
647   } else {
648     if (isfull(&t->t)) {
649       /* Need to resize the hash part, but we re-use the array part. */
650       size_t i;
651       upb_table new_table;
652 
653       if (!init(&new_table, t->t.size_lg2 + 1, a)) {
654         return false;
655       }
656 
657       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
658         const upb_tabent* e = &t->t.entries[i];
659         uint32_t hash;
660         upb_value v;
661 
662         _upb_value_setval(&v, e->val.val);
663         hash = upb_inthash(e->key);
664         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
665       }
666 
667       UPB_ASSERT(t->t.count == new_table.count);
668 
669       t->t = new_table;
670     }
671     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
672   }
673   check(t);
674   return true;
675 }
676 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)677 bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
678   const upb_tabval* table_v = inttable_val_const(t, key);
679   if (!table_v) return false;
680   if (v) _upb_value_setval(v, table_v->val);
681   return true;
682 }
683 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)684 bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
685   upb_tabval* table_v = inttable_val(t, key);
686   if (!table_v) return false;
687   table_v->val = val.val;
688   return true;
689 }
690 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)691 bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
692   bool success;
693   if (key < t->array_size) {
694     if (upb_arrhas(t->array[key])) {
695       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
696       t->array_count--;
697       if (val) {
698         _upb_value_setval(val, t->array[key].val);
699       }
700       mutable_array(t)[key] = empty;
701       success = true;
702     } else {
703       success = false;
704     }
705   } else {
706     success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
707   }
708   check(t);
709   return success;
710 }
711 
upb_inttable_compact(upb_inttable * t,upb_Arena * a)712 void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
713   /* A power-of-two histogram of the table keys. */
714   size_t counts[UPB_MAXARRSIZE + 1] = {0};
715 
716   /* The max key in each bucket. */
717   uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
718 
719   {
720     intptr_t iter = UPB_INTTABLE_BEGIN;
721     uintptr_t key;
722     upb_value val;
723     while (upb_inttable_next(t, &key, &val, &iter)) {
724       int bucket = log2ceil(key);
725       max[bucket] = UPB_MAX(max[bucket], key);
726       counts[bucket]++;
727     }
728   }
729 
730   /* Find the largest power of two that satisfies the MIN_DENSITY
731    * definition (while actually having some keys). */
732   size_t arr_count = upb_inttable_count(t);
733   int size_lg2;
734   upb_inttable new_t;
735 
736   for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
737     if (counts[size_lg2] == 0) {
738       /* We can halve again without losing any entries. */
739       continue;
740     } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
741       break;
742     }
743 
744     arr_count -= counts[size_lg2];
745   }
746 
747   UPB_ASSERT(arr_count <= upb_inttable_count(t));
748 
749   {
750     /* Insert all elements into new, perfectly-sized table. */
751     size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
752     size_t hash_count = upb_inttable_count(t) - arr_count;
753     size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
754     int hashsize_lg2 = log2ceil(hash_size);
755 
756     upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
757 
758     {
759       intptr_t iter = UPB_INTTABLE_BEGIN;
760       uintptr_t key;
761       upb_value val;
762       while (upb_inttable_next(t, &key, &val, &iter)) {
763         upb_inttable_insert(&new_t, key, val, a);
764       }
765     }
766 
767     UPB_ASSERT(new_t.array_size == arr_size);
768     UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
769   }
770   *t = new_t;
771 }
772 
773 // Iteration.
774 
upb_inttable_next(const upb_inttable * t,uintptr_t * key,upb_value * val,intptr_t * iter)775 bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
776                        intptr_t* iter) {
777   intptr_t i = *iter;
778   if ((size_t)(i + 1) <= t->array_size) {
779     while ((size_t)++i < t->array_size) {
780       upb_tabval ent = t->array[i];
781       if (upb_arrhas(ent)) {
782         *key = i;
783         *val = _upb_value_val(ent.val);
784         *iter = i;
785         return true;
786       }
787     }
788     i--;  // Back up to exactly one position before the start of the table.
789   }
790 
791   size_t tab_idx = next(&t->t, i - t->array_size);
792   if (tab_idx < upb_table_size(&t->t)) {
793     upb_tabent* ent = &t->t.entries[tab_idx];
794     *key = ent->key;
795     *val = _upb_value_val(ent->val.val);
796     *iter = tab_idx + t->array_size;
797     return true;
798   }
799 
800   return false;
801 }
802 
upb_inttable_removeiter(upb_inttable * t,intptr_t * iter)803 void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
804   intptr_t i = *iter;
805   if ((size_t)i < t->array_size) {
806     t->array_count--;
807     mutable_array(t)[i].val = -1;
808   } else {
809     upb_tabent* ent = &t->t.entries[i - t->array_size];
810     upb_tabent* prev = NULL;
811 
812     // Linear search, not great.
813     upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
814     for (upb_tabent* e = t->t.entries; e != end; e++) {
815       if (e->next == ent) {
816         prev = e;
817         break;
818       }
819     }
820 
821     if (prev) {
822       prev->next = ent->next;
823     }
824 
825     t->t.count--;
826     ent->key = 0;
827     ent->next = NULL;
828   }
829 }
830 
upb_strtable_next2(const upb_strtable * t,upb_StringView * key,upb_value * val,intptr_t * iter)831 bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
832                         upb_value* val, intptr_t* iter) {
833   size_t tab_idx = next(&t->t, *iter);
834   if (tab_idx < upb_table_size(&t->t)) {
835     upb_tabent* ent = &t->t.entries[tab_idx];
836     uint32_t len;
837     key->data = upb_tabstr(ent->key, &len);
838     key->size = len;
839     *val = _upb_value_val(ent->val.val);
840     *iter = tab_idx;
841     return true;
842   }
843 
844   return false;
845 }
846 
upb_strtable_removeiter(upb_strtable * t,intptr_t * iter)847 void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
848   intptr_t i = *iter;
849   upb_tabent* ent = &t->t.entries[i];
850   upb_tabent* prev = NULL;
851 
852   // Linear search, not great.
853   upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
854   for (upb_tabent* e = t->t.entries; e != end; e++) {
855     if (e->next == ent) {
856       prev = e;
857       break;
858     }
859   }
860 
861   if (prev) {
862     prev->next = ent->next;
863   }
864 
865   t->t.count--;
866   ent->key = 0;
867   ent->next = NULL;
868 }
869