xref: /aosp_15_r20/external/grpc-grpc/third_party/upb/upb/hash/common.c (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 /*
9  * upb_table Implementation
10  *
11  * Implementation is heavily inspired by Lua's ltable.c.
12  */
13 
14 #include <string.h>
15 
16 #include "upb/base/internal/log2.h"
17 #include "upb/hash/int_table.h"
18 #include "upb/hash/str_table.h"
19 
20 // Must be last.
21 #include "upb/port/def.inc"
22 
23 #define UPB_MAXARRSIZE 16  // 2**16 = 64k.
24 
25 // From Chromium.
26 #define ARRAY_SIZE(x) \
27   ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
28 
29 static const double MAX_LOAD = 0.85;
30 
31 /* The minimum utilization of the array part of a mixed hash/array table.  This
32  * is a speed/memory-usage tradeoff (though it's not straightforward because of
33  * cache effects).  The lower this is, the more memory we'll use. */
34 static const double MIN_DENSITY = 0.1;
35 
is_pow2(uint64_t v)36 static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
37 
_upb_value_val(uint64_t val)38 static upb_value _upb_value_val(uint64_t val) {
39   upb_value ret;
40   _upb_value_setval(&ret, val);
41   return ret;
42 }
43 
log2ceil(uint64_t v)44 static int log2ceil(uint64_t v) {
45   int ret = 0;
46   bool pow2 = is_pow2(v);
47   while (v >>= 1) ret++;
48   ret = pow2 ? ret : ret + 1;  // Ceiling.
49   return UPB_MIN(UPB_MAXARRSIZE, ret);
50 }
51 
52 /* A type to represent the lookup key of either a strtable or an inttable. */
53 typedef union {
54   uintptr_t num;
55   struct {
56     const char* str;
57     size_t len;
58   } str;
59 } lookupkey_t;
60 
strkey2(const char * str,size_t len)61 static lookupkey_t strkey2(const char* str, size_t len) {
62   lookupkey_t k;
63   k.str.str = str;
64   k.str.len = len;
65   return k;
66 }
67 
intkey(uintptr_t key)68 static lookupkey_t intkey(uintptr_t key) {
69   lookupkey_t k;
70   k.num = key;
71   return k;
72 }
73 
74 typedef uint32_t hashfunc_t(upb_tabkey key);
75 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
76 
77 /* Base table (shared code) ***************************************************/
78 
upb_inthash(uintptr_t key)79 static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
80 
upb_getentry(const upb_table * t,uint32_t hash)81 static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
82   return t->entries + (hash & t->mask);
83 }
84 
upb_arrhas(upb_tabval key)85 static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
86 
isfull(upb_table * t)87 static bool isfull(upb_table* t) { return t->count == t->max_count; }
88 
init(upb_table * t,uint8_t size_lg2,upb_Arena * a)89 static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
90   size_t bytes;
91 
92   t->count = 0;
93   t->size_lg2 = size_lg2;
94   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
95   t->max_count = upb_table_size(t) * MAX_LOAD;
96   bytes = upb_table_size(t) * sizeof(upb_tabent);
97   if (bytes > 0) {
98     t->entries = upb_Arena_Malloc(a, bytes);
99     if (!t->entries) return false;
100     memset(t->entries, 0, bytes);
101   } else {
102     t->entries = NULL;
103   }
104   return true;
105 }
106 
emptyent(upb_table * t,upb_tabent * e)107 static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
108   upb_tabent* begin = t->entries;
109   upb_tabent* end = begin + upb_table_size(t);
110   for (e = e + 1; e < end; e++) {
111     if (upb_tabent_isempty(e)) return e;
112   }
113   for (e = begin; e < end; e++) {
114     if (upb_tabent_isempty(e)) return e;
115   }
116   UPB_ASSERT(false);
117   return NULL;
118 }
119 
getentry_mutable(upb_table * t,uint32_t hash)120 static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
121   return (upb_tabent*)upb_getentry(t, hash);
122 }
123 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)124 static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
125                                    uint32_t hash, eqlfunc_t* eql) {
126   const upb_tabent* e;
127 
128   if (t->size_lg2 == 0) return NULL;
129   e = upb_getentry(t, hash);
130   if (upb_tabent_isempty(e)) return NULL;
131   while (1) {
132     if (eql(e->key, key)) return e;
133     if ((e = e->next) == NULL) return NULL;
134   }
135 }
136 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)137 static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
138                                      uint32_t hash, eqlfunc_t* eql) {
139   return (upb_tabent*)findentry(t, key, hash, eql);
140 }
141 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)142 static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
143                    uint32_t hash, eqlfunc_t* eql) {
144   const upb_tabent* e = findentry(t, key, hash, eql);
145   if (e) {
146     if (v) {
147       _upb_value_setval(v, e->val.val);
148     }
149     return true;
150   } else {
151     return false;
152   }
153 }
154 
155 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)156 static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
157                    upb_value val, uint32_t hash, hashfunc_t* hashfunc,
158                    eqlfunc_t* eql) {
159   upb_tabent* mainpos_e;
160   upb_tabent* our_e;
161 
162   UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
163 
164   t->count++;
165   mainpos_e = getentry_mutable(t, hash);
166   our_e = mainpos_e;
167 
168   if (upb_tabent_isempty(mainpos_e)) {
169     /* Our main position is empty; use it. */
170     our_e->next = NULL;
171   } else {
172     /* Collision. */
173     upb_tabent* new_e = emptyent(t, mainpos_e);
174     /* Head of collider's chain. */
175     upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
176     if (chain == mainpos_e) {
177       /* Existing ent is in its main position (it has the same hash as us, and
178        * is the head of our chain).  Insert to new ent and append to this chain.
179        */
180       new_e->next = mainpos_e->next;
181       mainpos_e->next = new_e;
182       our_e = new_e;
183     } else {
184       /* Existing ent is not in its main position (it is a node in some other
185        * chain).  This implies that no existing ent in the table has our hash.
186        * Evict it (updating its chain) and use its ent for head of our chain. */
187       *new_e = *mainpos_e; /* copies next. */
188       while (chain->next != mainpos_e) {
189         chain = (upb_tabent*)chain->next;
190         UPB_ASSERT(chain);
191       }
192       chain->next = new_e;
193       our_e = mainpos_e;
194       our_e->next = NULL;
195     }
196   }
197   our_e->key = tabkey;
198   our_e->val.val = val.val;
199   UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
200 }
201 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)202 static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
203                upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
204   upb_tabent* chain = getentry_mutable(t, hash);
205   if (upb_tabent_isempty(chain)) return false;
206   if (eql(chain->key, key)) {
207     /* Element to remove is at the head of its chain. */
208     t->count--;
209     if (val) _upb_value_setval(val, chain->val.val);
210     if (removed) *removed = chain->key;
211     if (chain->next) {
212       upb_tabent* move = (upb_tabent*)chain->next;
213       *chain = *move;
214       move->key = 0; /* Make the slot empty. */
215     } else {
216       chain->key = 0; /* Make the slot empty. */
217     }
218     return true;
219   } else {
220     /* Element to remove is either in a non-head position or not in the
221      * table. */
222     while (chain->next && !eql(chain->next->key, key)) {
223       chain = (upb_tabent*)chain->next;
224     }
225     if (chain->next) {
226       /* Found element to remove. */
227       upb_tabent* rm = (upb_tabent*)chain->next;
228       t->count--;
229       if (val) _upb_value_setval(val, chain->next->val.val);
230       if (removed) *removed = rm->key;
231       rm->key = 0; /* Make the slot empty. */
232       chain->next = rm->next;
233       return true;
234     } else {
235       /* Element to remove is not in the table. */
236       return false;
237     }
238   }
239 }
240 
next(const upb_table * t,size_t i)241 static size_t next(const upb_table* t, size_t i) {
242   do {
243     if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
244   } while (upb_tabent_isempty(&t->entries[i]));
245 
246   return i;
247 }
248 
begin(const upb_table * t)249 static size_t begin(const upb_table* t) { return next(t, -1); }
250 
251 /* upb_strtable ***************************************************************/
252 
253 /* A simple "subclass" of upb_table that only adds a hash function for strings.
254  */
255 
strcopy(lookupkey_t k2,upb_Arena * a)256 static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
257   uint32_t len = (uint32_t)k2.str.len;
258   char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
259   if (str == NULL) return 0;
260   memcpy(str, &len, sizeof(uint32_t));
261   if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
262   str[sizeof(uint32_t) + k2.str.len] = '\0';
263   return (uintptr_t)str;
264 }
265 
266 /* Adapted from ABSL's wyhash. */
267 
UnalignedLoad64(const void * p)268 static uint64_t UnalignedLoad64(const void* p) {
269   uint64_t val;
270   memcpy(&val, p, 8);
271   return val;
272 }
273 
UnalignedLoad32(const void * p)274 static uint32_t UnalignedLoad32(const void* p) {
275   uint32_t val;
276   memcpy(&val, p, 4);
277   return val;
278 }
279 
280 #if defined(_MSC_VER) && defined(_M_X64)
281 #include <intrin.h>
282 #endif
283 
284 /* Computes a * b, returning the low 64 bits of the result and storing the high
285  * 64 bits in |*high|. */
upb_umul128(uint64_t v0,uint64_t v1,uint64_t * out_high)286 static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
287 #ifdef __SIZEOF_INT128__
288   __uint128_t p = v0;
289   p *= v1;
290   *out_high = (uint64_t)(p >> 64);
291   return (uint64_t)p;
292 #elif defined(_MSC_VER) && defined(_M_X64)
293   return _umul128(v0, v1, out_high);
294 #else
295   uint64_t a32 = v0 >> 32;
296   uint64_t a00 = v0 & 0xffffffff;
297   uint64_t b32 = v1 >> 32;
298   uint64_t b00 = v1 & 0xffffffff;
299   uint64_t high = a32 * b32;
300   uint64_t low = a00 * b00;
301   uint64_t mid1 = a32 * b00;
302   uint64_t mid2 = a00 * b32;
303   low += (mid1 << 32) + (mid2 << 32);
304   // Omit carry bit, for mixing we do not care about exact numerical precision.
305   high += (mid1 >> 32) + (mid2 >> 32);
306   *out_high = high;
307   return low;
308 #endif
309 }
310 
WyhashMix(uint64_t v0,uint64_t v1)311 static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
312   uint64_t high;
313   uint64_t low = upb_umul128(v0, v1, &high);
314   return low ^ high;
315 }
316 
Wyhash(const void * data,size_t len,uint64_t seed,const uint64_t salt[])317 static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
318                        const uint64_t salt[]) {
319   const uint8_t* ptr = (const uint8_t*)data;
320   uint64_t starting_length = (uint64_t)len;
321   uint64_t current_state = seed ^ salt[0];
322 
323   if (len > 64) {
324     // If we have more than 64 bytes, we're going to handle chunks of 64
325     // bytes at a time. We're going to build up two separate hash states
326     // which we will then hash together.
327     uint64_t duplicated_state = current_state;
328 
329     do {
330       uint64_t a = UnalignedLoad64(ptr);
331       uint64_t b = UnalignedLoad64(ptr + 8);
332       uint64_t c = UnalignedLoad64(ptr + 16);
333       uint64_t d = UnalignedLoad64(ptr + 24);
334       uint64_t e = UnalignedLoad64(ptr + 32);
335       uint64_t f = UnalignedLoad64(ptr + 40);
336       uint64_t g = UnalignedLoad64(ptr + 48);
337       uint64_t h = UnalignedLoad64(ptr + 56);
338 
339       uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
340       uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
341       current_state = (cs0 ^ cs1);
342 
343       uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
344       uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
345       duplicated_state = (ds0 ^ ds1);
346 
347       ptr += 64;
348       len -= 64;
349     } while (len > 64);
350 
351     current_state = current_state ^ duplicated_state;
352   }
353 
354   // We now have a data `ptr` with at most 64 bytes and the current state
355   // of the hashing state machine stored in current_state.
356   while (len > 16) {
357     uint64_t a = UnalignedLoad64(ptr);
358     uint64_t b = UnalignedLoad64(ptr + 8);
359 
360     current_state = WyhashMix(a ^ salt[1], b ^ current_state);
361 
362     ptr += 16;
363     len -= 16;
364   }
365 
366   // We now have a data `ptr` with at most 16 bytes.
367   uint64_t a = 0;
368   uint64_t b = 0;
369   if (len > 8) {
370     // When we have at least 9 and at most 16 bytes, set A to the first 64
371     // bits of the input and B to the last 64 bits of the input. Yes, they will
372     // overlap in the middle if we are working with less than the full 16
373     // bytes.
374     a = UnalignedLoad64(ptr);
375     b = UnalignedLoad64(ptr + len - 8);
376   } else if (len > 3) {
377     // If we have at least 4 and at most 8 bytes, set A to the first 32
378     // bits and B to the last 32 bits.
379     a = UnalignedLoad32(ptr);
380     b = UnalignedLoad32(ptr + len - 4);
381   } else if (len > 0) {
382     // If we have at least 1 and at most 3 bytes, read all of the provided
383     // bits into A, with some adjustments.
384     a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
385     b = 0;
386   } else {
387     a = 0;
388     b = 0;
389   }
390 
391   uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
392   uint64_t z = salt[1] ^ starting_length;
393   return WyhashMix(w, z);
394 }
395 
396 const uint64_t kWyhashSalt[5] = {
397     0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
398     0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
399 };
400 
_upb_Hash(const void * p,size_t n,uint64_t seed)401 uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
402   return Wyhash(p, n, seed, kWyhashSalt);
403 }
404 
_upb_Hash_NoSeed(const char * p,size_t n)405 static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
406   return _upb_Hash(p, n, 0);
407 }
408 
strhash(upb_tabkey key)409 static uint32_t strhash(upb_tabkey key) {
410   uint32_t len;
411   char* str = upb_tabstr(key, &len);
412   return _upb_Hash_NoSeed(str, len);
413 }
414 
streql(upb_tabkey k1,lookupkey_t k2)415 static bool streql(upb_tabkey k1, lookupkey_t k2) {
416   uint32_t len;
417   char* str = upb_tabstr(k1, &len);
418   return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
419 }
420 
upb_strtable_init(upb_strtable * t,size_t expected_size,upb_Arena * a)421 bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
422   // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
423   // denominator.
424   size_t need_entries = (expected_size + 1) * 1204 / 1024;
425   UPB_ASSERT(need_entries >= expected_size * 0.85);
426   int size_lg2 = upb_Log2Ceiling(need_entries);
427   return init(&t->t, size_lg2, a);
428 }
429 
upb_strtable_clear(upb_strtable * t)430 void upb_strtable_clear(upb_strtable* t) {
431   size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
432   t->t.count = 0;
433   memset((char*)t->t.entries, 0, bytes);
434 }
435 
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_Arena * a)436 bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
437   upb_strtable new_table;
438   if (!init(&new_table.t, size_lg2, a)) return false;
439 
440   intptr_t iter = UPB_STRTABLE_BEGIN;
441   upb_StringView key;
442   upb_value val;
443   while (upb_strtable_next2(t, &key, &val, &iter)) {
444     upb_strtable_insert(&new_table, key.data, key.size, val, a);
445   }
446   *t = new_table;
447   return true;
448 }
449 
upb_strtable_insert(upb_strtable * t,const char * k,size_t len,upb_value v,upb_Arena * a)450 bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
451                          upb_value v, upb_Arena* a) {
452   lookupkey_t key;
453   upb_tabkey tabkey;
454   uint32_t hash;
455 
456   if (isfull(&t->t)) {
457     /* Need to resize.  New table of double the size, add old elements to it. */
458     if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
459       return false;
460     }
461   }
462 
463   key = strkey2(k, len);
464   tabkey = strcopy(key, a);
465   if (tabkey == 0) return false;
466 
467   hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
468   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
469   return true;
470 }
471 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)472 bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
473                           upb_value* v) {
474   uint32_t hash = _upb_Hash_NoSeed(key, len);
475   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
476 }
477 
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)478 bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
479                           upb_value* val) {
480   uint32_t hash = _upb_Hash_NoSeed(key, len);
481   upb_tabkey tabkey;
482   return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
483 }
484 
485 /* Iteration */
486 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)487 void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
488   i->t = t;
489   i->index = begin(&t->t);
490 }
491 
upb_strtable_next(upb_strtable_iter * i)492 void upb_strtable_next(upb_strtable_iter* i) {
493   i->index = next(&i->t->t, i->index);
494 }
495 
upb_strtable_done(const upb_strtable_iter * i)496 bool upb_strtable_done(const upb_strtable_iter* i) {
497   if (!i->t) return true;
498   return i->index >= upb_table_size(&i->t->t) ||
499          upb_tabent_isempty(str_tabent(i));
500 }
501 
upb_strtable_iter_key(const upb_strtable_iter * i)502 upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
503   upb_StringView key;
504   uint32_t len;
505   UPB_ASSERT(!upb_strtable_done(i));
506   key.data = upb_tabstr(str_tabent(i)->key, &len);
507   key.size = len;
508   return key;
509 }
510 
upb_strtable_iter_value(const upb_strtable_iter * i)511 upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
512   UPB_ASSERT(!upb_strtable_done(i));
513   return _upb_value_val(str_tabent(i)->val.val);
514 }
515 
upb_strtable_iter_setdone(upb_strtable_iter * i)516 void upb_strtable_iter_setdone(upb_strtable_iter* i) {
517   i->t = NULL;
518   i->index = SIZE_MAX;
519 }
520 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)521 bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
522                                const upb_strtable_iter* i2) {
523   if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
524   return i1->t == i2->t && i1->index == i2->index;
525 }
526 
527 /* upb_inttable ***************************************************************/
528 
529 /* For inttables we use a hybrid structure where small keys are kept in an
530  * array and large keys are put in the hash table. */
531 
inthash(upb_tabkey key)532 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
533 
inteql(upb_tabkey k1,lookupkey_t k2)534 static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
535 
mutable_array(upb_inttable * t)536 static upb_tabval* mutable_array(upb_inttable* t) {
537   return (upb_tabval*)t->array;
538 }
539 
inttable_val(upb_inttable * t,uintptr_t key)540 static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
541   if (key < t->array_size) {
542     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
543   } else {
544     upb_tabent* e =
545         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
546     return e ? &e->val : NULL;
547   }
548 }
549 
inttable_val_const(const upb_inttable * t,uintptr_t key)550 static const upb_tabval* inttable_val_const(const upb_inttable* t,
551                                             uintptr_t key) {
552   return inttable_val((upb_inttable*)t, key);
553 }
554 
upb_inttable_count(const upb_inttable * t)555 size_t upb_inttable_count(const upb_inttable* t) {
556   return t->t.count + t->array_count;
557 }
558 
check(upb_inttable * t)559 static void check(upb_inttable* t) {
560   UPB_UNUSED(t);
561 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
562   {
563     // This check is very expensive (makes inserts/deletes O(N)).
564     size_t count = 0;
565     intptr_t iter = UPB_INTTABLE_BEGIN;
566     uintptr_t key;
567     upb_value val;
568     while (upb_inttable_next(t, &key, &val, &iter)) {
569       UPB_ASSERT(upb_inttable_lookup(t, key, NULL));
570     }
571     UPB_ASSERT(count == upb_inttable_count(t));
572   }
573 #endif
574 }
575 
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_Arena * a)576 bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
577                             upb_Arena* a) {
578   size_t array_bytes;
579 
580   if (!init(&t->t, hsize_lg2, a)) return false;
581   /* Always make the array part at least 1 long, so that we know key 0
582    * won't be in the hash part, which simplifies things. */
583   t->array_size = UPB_MAX(1, asize);
584   t->array_count = 0;
585   array_bytes = t->array_size * sizeof(upb_value);
586   t->array = upb_Arena_Malloc(a, array_bytes);
587   if (!t->array) {
588     return false;
589   }
590   memset(mutable_array(t), 0xff, array_bytes);
591   check(t);
592   return true;
593 }
594 
upb_inttable_init(upb_inttable * t,upb_Arena * a)595 bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
596   return upb_inttable_sizedinit(t, 0, 4, a);
597 }
598 
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val,upb_Arena * a)599 bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
600                          upb_Arena* a) {
601   upb_tabval tabval;
602   tabval.val = val.val;
603   UPB_ASSERT(
604       upb_arrhas(tabval)); /* This will reject (uint64_t)-1.  Fix this. */
605 
606   if (key < t->array_size) {
607     UPB_ASSERT(!upb_arrhas(t->array[key]));
608     t->array_count++;
609     mutable_array(t)[key].val = val.val;
610   } else {
611     if (isfull(&t->t)) {
612       /* Need to resize the hash part, but we re-use the array part. */
613       size_t i;
614       upb_table new_table;
615 
616       if (!init(&new_table, t->t.size_lg2 + 1, a)) {
617         return false;
618       }
619 
620       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
621         const upb_tabent* e = &t->t.entries[i];
622         uint32_t hash;
623         upb_value v;
624 
625         _upb_value_setval(&v, e->val.val);
626         hash = upb_inthash(e->key);
627         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
628       }
629 
630       UPB_ASSERT(t->t.count == new_table.count);
631 
632       t->t = new_table;
633     }
634     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
635   }
636   check(t);
637   return true;
638 }
639 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)640 bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
641   const upb_tabval* table_v = inttable_val_const(t, key);
642   if (!table_v) return false;
643   if (v) _upb_value_setval(v, table_v->val);
644   return true;
645 }
646 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)647 bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
648   upb_tabval* table_v = inttable_val(t, key);
649   if (!table_v) return false;
650   table_v->val = val.val;
651   return true;
652 }
653 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)654 bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
655   bool success;
656   if (key < t->array_size) {
657     if (upb_arrhas(t->array[key])) {
658       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
659       t->array_count--;
660       if (val) {
661         _upb_value_setval(val, t->array[key].val);
662       }
663       mutable_array(t)[key] = empty;
664       success = true;
665     } else {
666       success = false;
667     }
668   } else {
669     success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
670   }
671   check(t);
672   return success;
673 }
674 
upb_inttable_compact(upb_inttable * t,upb_Arena * a)675 void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
676   /* A power-of-two histogram of the table keys. */
677   size_t counts[UPB_MAXARRSIZE + 1] = {0};
678 
679   /* The max key in each bucket. */
680   uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
681 
682   {
683     intptr_t iter = UPB_INTTABLE_BEGIN;
684     uintptr_t key;
685     upb_value val;
686     while (upb_inttable_next(t, &key, &val, &iter)) {
687       int bucket = log2ceil(key);
688       max[bucket] = UPB_MAX(max[bucket], key);
689       counts[bucket]++;
690     }
691   }
692 
693   /* Find the largest power of two that satisfies the MIN_DENSITY
694    * definition (while actually having some keys). */
695   size_t arr_count = upb_inttable_count(t);
696   int size_lg2;
697   upb_inttable new_t;
698 
699   for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
700     if (counts[size_lg2] == 0) {
701       /* We can halve again without losing any entries. */
702       continue;
703     } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
704       break;
705     }
706 
707     arr_count -= counts[size_lg2];
708   }
709 
710   UPB_ASSERT(arr_count <= upb_inttable_count(t));
711 
712   {
713     /* Insert all elements into new, perfectly-sized table. */
714     size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
715     size_t hash_count = upb_inttable_count(t) - arr_count;
716     size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
717     int hashsize_lg2 = log2ceil(hash_size);
718 
719     upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
720 
721     {
722       intptr_t iter = UPB_INTTABLE_BEGIN;
723       uintptr_t key;
724       upb_value val;
725       while (upb_inttable_next(t, &key, &val, &iter)) {
726         upb_inttable_insert(&new_t, key, val, a);
727       }
728     }
729 
730     UPB_ASSERT(new_t.array_size == arr_size);
731     UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
732   }
733   *t = new_t;
734 }
735 
736 // Iteration.
737 
upb_inttable_next(const upb_inttable * t,uintptr_t * key,upb_value * val,intptr_t * iter)738 bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
739                        intptr_t* iter) {
740   intptr_t i = *iter;
741   if ((size_t)(i + 1) <= t->array_size) {
742     while ((size_t)++i < t->array_size) {
743       upb_tabval ent = t->array[i];
744       if (upb_arrhas(ent)) {
745         *key = i;
746         *val = _upb_value_val(ent.val);
747         *iter = i;
748         return true;
749       }
750     }
751     i--;  // Back up to exactly one position before the start of the table.
752   }
753 
754   size_t tab_idx = next(&t->t, i - t->array_size);
755   if (tab_idx < upb_table_size(&t->t)) {
756     upb_tabent* ent = &t->t.entries[tab_idx];
757     *key = ent->key;
758     *val = _upb_value_val(ent->val.val);
759     *iter = tab_idx + t->array_size;
760     return true;
761   }
762 
763   return false;
764 }
765 
upb_inttable_removeiter(upb_inttable * t,intptr_t * iter)766 void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
767   intptr_t i = *iter;
768   if ((size_t)i < t->array_size) {
769     t->array_count--;
770     mutable_array(t)[i].val = -1;
771   } else {
772     upb_tabent* ent = &t->t.entries[i - t->array_size];
773     upb_tabent* prev = NULL;
774 
775     // Linear search, not great.
776     upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
777     for (upb_tabent* e = t->t.entries; e != end; e++) {
778       if (e->next == ent) {
779         prev = e;
780         break;
781       }
782     }
783 
784     if (prev) {
785       prev->next = ent->next;
786     }
787 
788     t->t.count--;
789     ent->key = 0;
790     ent->next = NULL;
791   }
792 }
793 
upb_strtable_next2(const upb_strtable * t,upb_StringView * key,upb_value * val,intptr_t * iter)794 bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
795                         upb_value* val, intptr_t* iter) {
796   size_t tab_idx = next(&t->t, *iter);
797   if (tab_idx < upb_table_size(&t->t)) {
798     upb_tabent* ent = &t->t.entries[tab_idx];
799     uint32_t len;
800     key->data = upb_tabstr(ent->key, &len);
801     key->size = len;
802     *val = _upb_value_val(ent->val.val);
803     *iter = tab_idx;
804     return true;
805   }
806 
807   return false;
808 }
809 
upb_strtable_removeiter(upb_strtable * t,intptr_t * iter)810 void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
811   intptr_t i = *iter;
812   upb_tabent* ent = &t->t.entries[i];
813   upb_tabent* prev = NULL;
814 
815   // Linear search, not great.
816   upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
817   for (upb_tabent* e = t->t.entries; e != end; e++) {
818     if (e->next == ent) {
819       prev = e;
820       break;
821     }
822   }
823 
824   if (prev) {
825     prev->next = ent->next;
826   }
827 
828   t->t.count--;
829   ent->key = 0;
830   ent->next = NULL;
831 }
832 
upb_strtable_setentryvalue(upb_strtable * t,intptr_t iter,upb_value v)833 void upb_strtable_setentryvalue(upb_strtable* t, intptr_t iter, upb_value v) {
834   upb_tabent* ent = &t->t.entries[iter];
835   ent->val.val = v.val;
836 }
837