1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 /*
9 * upb_table Implementation
10 *
11 * Implementation is heavily inspired by Lua's ltable.c.
12 */
13
14 #include <string.h>
15
16 #include "upb/base/internal/log2.h"
17 #include "upb/hash/int_table.h"
18 #include "upb/hash/str_table.h"
19
20 // Must be last.
21 #include "upb/port/def.inc"
22
23 #define UPB_MAXARRSIZE 16 // 2**16 = 64k.
24
25 // From Chromium.
26 #define ARRAY_SIZE(x) \
27 ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
28
29 static const double MAX_LOAD = 0.85;
30
31 /* The minimum utilization of the array part of a mixed hash/array table. This
32 * is a speed/memory-usage tradeoff (though it's not straightforward because of
33 * cache effects). The lower this is, the more memory we'll use. */
34 static const double MIN_DENSITY = 0.1;
35
is_pow2(uint64_t v)36 static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
37
_upb_value_val(uint64_t val)38 static upb_value _upb_value_val(uint64_t val) {
39 upb_value ret;
40 _upb_value_setval(&ret, val);
41 return ret;
42 }
43
log2ceil(uint64_t v)44 static int log2ceil(uint64_t v) {
45 int ret = 0;
46 bool pow2 = is_pow2(v);
47 while (v >>= 1) ret++;
48 ret = pow2 ? ret : ret + 1; // Ceiling.
49 return UPB_MIN(UPB_MAXARRSIZE, ret);
50 }
51
52 /* A type to represent the lookup key of either a strtable or an inttable. */
53 typedef union {
54 uintptr_t num;
55 struct {
56 const char* str;
57 size_t len;
58 } str;
59 } lookupkey_t;
60
strkey2(const char * str,size_t len)61 static lookupkey_t strkey2(const char* str, size_t len) {
62 lookupkey_t k;
63 k.str.str = str;
64 k.str.len = len;
65 return k;
66 }
67
intkey(uintptr_t key)68 static lookupkey_t intkey(uintptr_t key) {
69 lookupkey_t k;
70 k.num = key;
71 return k;
72 }
73
74 typedef uint32_t hashfunc_t(upb_tabkey key);
75 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
76
77 /* Base table (shared code) ***************************************************/
78
upb_inthash(uintptr_t key)79 static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
80
upb_getentry(const upb_table * t,uint32_t hash)81 static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
82 return t->entries + (hash & t->mask);
83 }
84
upb_arrhas(upb_tabval key)85 static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
86
isfull(upb_table * t)87 static bool isfull(upb_table* t) { return t->count == t->max_count; }
88
init(upb_table * t,uint8_t size_lg2,upb_Arena * a)89 static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
90 size_t bytes;
91
92 t->count = 0;
93 t->size_lg2 = size_lg2;
94 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
95 t->max_count = upb_table_size(t) * MAX_LOAD;
96 bytes = upb_table_size(t) * sizeof(upb_tabent);
97 if (bytes > 0) {
98 t->entries = upb_Arena_Malloc(a, bytes);
99 if (!t->entries) return false;
100 memset(t->entries, 0, bytes);
101 } else {
102 t->entries = NULL;
103 }
104 return true;
105 }
106
emptyent(upb_table * t,upb_tabent * e)107 static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
108 upb_tabent* begin = t->entries;
109 upb_tabent* end = begin + upb_table_size(t);
110 for (e = e + 1; e < end; e++) {
111 if (upb_tabent_isempty(e)) return e;
112 }
113 for (e = begin; e < end; e++) {
114 if (upb_tabent_isempty(e)) return e;
115 }
116 UPB_ASSERT(false);
117 return NULL;
118 }
119
getentry_mutable(upb_table * t,uint32_t hash)120 static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
121 return (upb_tabent*)upb_getentry(t, hash);
122 }
123
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)124 static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
125 uint32_t hash, eqlfunc_t* eql) {
126 const upb_tabent* e;
127
128 if (t->size_lg2 == 0) return NULL;
129 e = upb_getentry(t, hash);
130 if (upb_tabent_isempty(e)) return NULL;
131 while (1) {
132 if (eql(e->key, key)) return e;
133 if ((e = e->next) == NULL) return NULL;
134 }
135 }
136
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)137 static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
138 uint32_t hash, eqlfunc_t* eql) {
139 return (upb_tabent*)findentry(t, key, hash, eql);
140 }
141
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)142 static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
143 uint32_t hash, eqlfunc_t* eql) {
144 const upb_tabent* e = findentry(t, key, hash, eql);
145 if (e) {
146 if (v) {
147 _upb_value_setval(v, e->val.val);
148 }
149 return true;
150 } else {
151 return false;
152 }
153 }
154
155 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)156 static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
157 upb_value val, uint32_t hash, hashfunc_t* hashfunc,
158 eqlfunc_t* eql) {
159 upb_tabent* mainpos_e;
160 upb_tabent* our_e;
161
162 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
163
164 t->count++;
165 mainpos_e = getentry_mutable(t, hash);
166 our_e = mainpos_e;
167
168 if (upb_tabent_isempty(mainpos_e)) {
169 /* Our main position is empty; use it. */
170 our_e->next = NULL;
171 } else {
172 /* Collision. */
173 upb_tabent* new_e = emptyent(t, mainpos_e);
174 /* Head of collider's chain. */
175 upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
176 if (chain == mainpos_e) {
177 /* Existing ent is in its main position (it has the same hash as us, and
178 * is the head of our chain). Insert to new ent and append to this chain.
179 */
180 new_e->next = mainpos_e->next;
181 mainpos_e->next = new_e;
182 our_e = new_e;
183 } else {
184 /* Existing ent is not in its main position (it is a node in some other
185 * chain). This implies that no existing ent in the table has our hash.
186 * Evict it (updating its chain) and use its ent for head of our chain. */
187 *new_e = *mainpos_e; /* copies next. */
188 while (chain->next != mainpos_e) {
189 chain = (upb_tabent*)chain->next;
190 UPB_ASSERT(chain);
191 }
192 chain->next = new_e;
193 our_e = mainpos_e;
194 our_e->next = NULL;
195 }
196 }
197 our_e->key = tabkey;
198 our_e->val.val = val.val;
199 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
200 }
201
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)202 static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
203 upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
204 upb_tabent* chain = getentry_mutable(t, hash);
205 if (upb_tabent_isempty(chain)) return false;
206 if (eql(chain->key, key)) {
207 /* Element to remove is at the head of its chain. */
208 t->count--;
209 if (val) _upb_value_setval(val, chain->val.val);
210 if (removed) *removed = chain->key;
211 if (chain->next) {
212 upb_tabent* move = (upb_tabent*)chain->next;
213 *chain = *move;
214 move->key = 0; /* Make the slot empty. */
215 } else {
216 chain->key = 0; /* Make the slot empty. */
217 }
218 return true;
219 } else {
220 /* Element to remove is either in a non-head position or not in the
221 * table. */
222 while (chain->next && !eql(chain->next->key, key)) {
223 chain = (upb_tabent*)chain->next;
224 }
225 if (chain->next) {
226 /* Found element to remove. */
227 upb_tabent* rm = (upb_tabent*)chain->next;
228 t->count--;
229 if (val) _upb_value_setval(val, chain->next->val.val);
230 if (removed) *removed = rm->key;
231 rm->key = 0; /* Make the slot empty. */
232 chain->next = rm->next;
233 return true;
234 } else {
235 /* Element to remove is not in the table. */
236 return false;
237 }
238 }
239 }
240
next(const upb_table * t,size_t i)241 static size_t next(const upb_table* t, size_t i) {
242 do {
243 if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
244 } while (upb_tabent_isempty(&t->entries[i]));
245
246 return i;
247 }
248
begin(const upb_table * t)249 static size_t begin(const upb_table* t) { return next(t, -1); }
250
251 /* upb_strtable ***************************************************************/
252
253 /* A simple "subclass" of upb_table that only adds a hash function for strings.
254 */
255
strcopy(lookupkey_t k2,upb_Arena * a)256 static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
257 uint32_t len = (uint32_t)k2.str.len;
258 char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
259 if (str == NULL) return 0;
260 memcpy(str, &len, sizeof(uint32_t));
261 if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
262 str[sizeof(uint32_t) + k2.str.len] = '\0';
263 return (uintptr_t)str;
264 }
265
266 /* Adapted from ABSL's wyhash. */
267
UnalignedLoad64(const void * p)268 static uint64_t UnalignedLoad64(const void* p) {
269 uint64_t val;
270 memcpy(&val, p, 8);
271 return val;
272 }
273
UnalignedLoad32(const void * p)274 static uint32_t UnalignedLoad32(const void* p) {
275 uint32_t val;
276 memcpy(&val, p, 4);
277 return val;
278 }
279
280 #if defined(_MSC_VER) && defined(_M_X64)
281 #include <intrin.h>
282 #endif
283
284 /* Computes a * b, returning the low 64 bits of the result and storing the high
285 * 64 bits in |*high|. */
upb_umul128(uint64_t v0,uint64_t v1,uint64_t * out_high)286 static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
287 #ifdef __SIZEOF_INT128__
288 __uint128_t p = v0;
289 p *= v1;
290 *out_high = (uint64_t)(p >> 64);
291 return (uint64_t)p;
292 #elif defined(_MSC_VER) && defined(_M_X64)
293 return _umul128(v0, v1, out_high);
294 #else
295 uint64_t a32 = v0 >> 32;
296 uint64_t a00 = v0 & 0xffffffff;
297 uint64_t b32 = v1 >> 32;
298 uint64_t b00 = v1 & 0xffffffff;
299 uint64_t high = a32 * b32;
300 uint64_t low = a00 * b00;
301 uint64_t mid1 = a32 * b00;
302 uint64_t mid2 = a00 * b32;
303 low += (mid1 << 32) + (mid2 << 32);
304 // Omit carry bit, for mixing we do not care about exact numerical precision.
305 high += (mid1 >> 32) + (mid2 >> 32);
306 *out_high = high;
307 return low;
308 #endif
309 }
310
WyhashMix(uint64_t v0,uint64_t v1)311 static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
312 uint64_t high;
313 uint64_t low = upb_umul128(v0, v1, &high);
314 return low ^ high;
315 }
316
Wyhash(const void * data,size_t len,uint64_t seed,const uint64_t salt[])317 static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
318 const uint64_t salt[]) {
319 const uint8_t* ptr = (const uint8_t*)data;
320 uint64_t starting_length = (uint64_t)len;
321 uint64_t current_state = seed ^ salt[0];
322
323 if (len > 64) {
324 // If we have more than 64 bytes, we're going to handle chunks of 64
325 // bytes at a time. We're going to build up two separate hash states
326 // which we will then hash together.
327 uint64_t duplicated_state = current_state;
328
329 do {
330 uint64_t a = UnalignedLoad64(ptr);
331 uint64_t b = UnalignedLoad64(ptr + 8);
332 uint64_t c = UnalignedLoad64(ptr + 16);
333 uint64_t d = UnalignedLoad64(ptr + 24);
334 uint64_t e = UnalignedLoad64(ptr + 32);
335 uint64_t f = UnalignedLoad64(ptr + 40);
336 uint64_t g = UnalignedLoad64(ptr + 48);
337 uint64_t h = UnalignedLoad64(ptr + 56);
338
339 uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
340 uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
341 current_state = (cs0 ^ cs1);
342
343 uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
344 uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
345 duplicated_state = (ds0 ^ ds1);
346
347 ptr += 64;
348 len -= 64;
349 } while (len > 64);
350
351 current_state = current_state ^ duplicated_state;
352 }
353
354 // We now have a data `ptr` with at most 64 bytes and the current state
355 // of the hashing state machine stored in current_state.
356 while (len > 16) {
357 uint64_t a = UnalignedLoad64(ptr);
358 uint64_t b = UnalignedLoad64(ptr + 8);
359
360 current_state = WyhashMix(a ^ salt[1], b ^ current_state);
361
362 ptr += 16;
363 len -= 16;
364 }
365
366 // We now have a data `ptr` with at most 16 bytes.
367 uint64_t a = 0;
368 uint64_t b = 0;
369 if (len > 8) {
370 // When we have at least 9 and at most 16 bytes, set A to the first 64
371 // bits of the input and B to the last 64 bits of the input. Yes, they will
372 // overlap in the middle if we are working with less than the full 16
373 // bytes.
374 a = UnalignedLoad64(ptr);
375 b = UnalignedLoad64(ptr + len - 8);
376 } else if (len > 3) {
377 // If we have at least 4 and at most 8 bytes, set A to the first 32
378 // bits and B to the last 32 bits.
379 a = UnalignedLoad32(ptr);
380 b = UnalignedLoad32(ptr + len - 4);
381 } else if (len > 0) {
382 // If we have at least 1 and at most 3 bytes, read all of the provided
383 // bits into A, with some adjustments.
384 a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
385 b = 0;
386 } else {
387 a = 0;
388 b = 0;
389 }
390
391 uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
392 uint64_t z = salt[1] ^ starting_length;
393 return WyhashMix(w, z);
394 }
395
396 const uint64_t kWyhashSalt[5] = {
397 0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
398 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
399 };
400
_upb_Hash(const void * p,size_t n,uint64_t seed)401 uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
402 return Wyhash(p, n, seed, kWyhashSalt);
403 }
404
_upb_Hash_NoSeed(const char * p,size_t n)405 static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
406 return _upb_Hash(p, n, 0);
407 }
408
strhash(upb_tabkey key)409 static uint32_t strhash(upb_tabkey key) {
410 uint32_t len;
411 char* str = upb_tabstr(key, &len);
412 return _upb_Hash_NoSeed(str, len);
413 }
414
streql(upb_tabkey k1,lookupkey_t k2)415 static bool streql(upb_tabkey k1, lookupkey_t k2) {
416 uint32_t len;
417 char* str = upb_tabstr(k1, &len);
418 return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
419 }
420
upb_strtable_init(upb_strtable * t,size_t expected_size,upb_Arena * a)421 bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
422 // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
423 // denominator.
424 size_t need_entries = (expected_size + 1) * 1204 / 1024;
425 UPB_ASSERT(need_entries >= expected_size * 0.85);
426 int size_lg2 = upb_Log2Ceiling(need_entries);
427 return init(&t->t, size_lg2, a);
428 }
429
upb_strtable_clear(upb_strtable * t)430 void upb_strtable_clear(upb_strtable* t) {
431 size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
432 t->t.count = 0;
433 memset((char*)t->t.entries, 0, bytes);
434 }
435
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_Arena * a)436 bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
437 upb_strtable new_table;
438 if (!init(&new_table.t, size_lg2, a)) return false;
439
440 intptr_t iter = UPB_STRTABLE_BEGIN;
441 upb_StringView key;
442 upb_value val;
443 while (upb_strtable_next2(t, &key, &val, &iter)) {
444 upb_strtable_insert(&new_table, key.data, key.size, val, a);
445 }
446 *t = new_table;
447 return true;
448 }
449
upb_strtable_insert(upb_strtable * t,const char * k,size_t len,upb_value v,upb_Arena * a)450 bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
451 upb_value v, upb_Arena* a) {
452 lookupkey_t key;
453 upb_tabkey tabkey;
454 uint32_t hash;
455
456 if (isfull(&t->t)) {
457 /* Need to resize. New table of double the size, add old elements to it. */
458 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
459 return false;
460 }
461 }
462
463 key = strkey2(k, len);
464 tabkey = strcopy(key, a);
465 if (tabkey == 0) return false;
466
467 hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
468 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
469 return true;
470 }
471
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)472 bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
473 upb_value* v) {
474 uint32_t hash = _upb_Hash_NoSeed(key, len);
475 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
476 }
477
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)478 bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
479 upb_value* val) {
480 uint32_t hash = _upb_Hash_NoSeed(key, len);
481 upb_tabkey tabkey;
482 return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
483 }
484
485 /* Iteration */
486
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)487 void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
488 i->t = t;
489 i->index = begin(&t->t);
490 }
491
upb_strtable_next(upb_strtable_iter * i)492 void upb_strtable_next(upb_strtable_iter* i) {
493 i->index = next(&i->t->t, i->index);
494 }
495
upb_strtable_done(const upb_strtable_iter * i)496 bool upb_strtable_done(const upb_strtable_iter* i) {
497 if (!i->t) return true;
498 return i->index >= upb_table_size(&i->t->t) ||
499 upb_tabent_isempty(str_tabent(i));
500 }
501
upb_strtable_iter_key(const upb_strtable_iter * i)502 upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
503 upb_StringView key;
504 uint32_t len;
505 UPB_ASSERT(!upb_strtable_done(i));
506 key.data = upb_tabstr(str_tabent(i)->key, &len);
507 key.size = len;
508 return key;
509 }
510
upb_strtable_iter_value(const upb_strtable_iter * i)511 upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
512 UPB_ASSERT(!upb_strtable_done(i));
513 return _upb_value_val(str_tabent(i)->val.val);
514 }
515
upb_strtable_iter_setdone(upb_strtable_iter * i)516 void upb_strtable_iter_setdone(upb_strtable_iter* i) {
517 i->t = NULL;
518 i->index = SIZE_MAX;
519 }
520
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)521 bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
522 const upb_strtable_iter* i2) {
523 if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
524 return i1->t == i2->t && i1->index == i2->index;
525 }
526
527 /* upb_inttable ***************************************************************/
528
529 /* For inttables we use a hybrid structure where small keys are kept in an
530 * array and large keys are put in the hash table. */
531
inthash(upb_tabkey key)532 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
533
inteql(upb_tabkey k1,lookupkey_t k2)534 static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
535
mutable_array(upb_inttable * t)536 static upb_tabval* mutable_array(upb_inttable* t) {
537 return (upb_tabval*)t->array;
538 }
539
inttable_val(upb_inttable * t,uintptr_t key)540 static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
541 if (key < t->array_size) {
542 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
543 } else {
544 upb_tabent* e =
545 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
546 return e ? &e->val : NULL;
547 }
548 }
549
inttable_val_const(const upb_inttable * t,uintptr_t key)550 static const upb_tabval* inttable_val_const(const upb_inttable* t,
551 uintptr_t key) {
552 return inttable_val((upb_inttable*)t, key);
553 }
554
upb_inttable_count(const upb_inttable * t)555 size_t upb_inttable_count(const upb_inttable* t) {
556 return t->t.count + t->array_count;
557 }
558
check(upb_inttable * t)559 static void check(upb_inttable* t) {
560 UPB_UNUSED(t);
561 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
562 {
563 // This check is very expensive (makes inserts/deletes O(N)).
564 size_t count = 0;
565 intptr_t iter = UPB_INTTABLE_BEGIN;
566 uintptr_t key;
567 upb_value val;
568 while (upb_inttable_next(t, &key, &val, &iter)) {
569 UPB_ASSERT(upb_inttable_lookup(t, key, NULL));
570 }
571 UPB_ASSERT(count == upb_inttable_count(t));
572 }
573 #endif
574 }
575
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_Arena * a)576 bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
577 upb_Arena* a) {
578 size_t array_bytes;
579
580 if (!init(&t->t, hsize_lg2, a)) return false;
581 /* Always make the array part at least 1 long, so that we know key 0
582 * won't be in the hash part, which simplifies things. */
583 t->array_size = UPB_MAX(1, asize);
584 t->array_count = 0;
585 array_bytes = t->array_size * sizeof(upb_value);
586 t->array = upb_Arena_Malloc(a, array_bytes);
587 if (!t->array) {
588 return false;
589 }
590 memset(mutable_array(t), 0xff, array_bytes);
591 check(t);
592 return true;
593 }
594
upb_inttable_init(upb_inttable * t,upb_Arena * a)595 bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
596 return upb_inttable_sizedinit(t, 0, 4, a);
597 }
598
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val,upb_Arena * a)599 bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
600 upb_Arena* a) {
601 upb_tabval tabval;
602 tabval.val = val.val;
603 UPB_ASSERT(
604 upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
605
606 if (key < t->array_size) {
607 UPB_ASSERT(!upb_arrhas(t->array[key]));
608 t->array_count++;
609 mutable_array(t)[key].val = val.val;
610 } else {
611 if (isfull(&t->t)) {
612 /* Need to resize the hash part, but we re-use the array part. */
613 size_t i;
614 upb_table new_table;
615
616 if (!init(&new_table, t->t.size_lg2 + 1, a)) {
617 return false;
618 }
619
620 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
621 const upb_tabent* e = &t->t.entries[i];
622 uint32_t hash;
623 upb_value v;
624
625 _upb_value_setval(&v, e->val.val);
626 hash = upb_inthash(e->key);
627 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
628 }
629
630 UPB_ASSERT(t->t.count == new_table.count);
631
632 t->t = new_table;
633 }
634 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
635 }
636 check(t);
637 return true;
638 }
639
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)640 bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
641 const upb_tabval* table_v = inttable_val_const(t, key);
642 if (!table_v) return false;
643 if (v) _upb_value_setval(v, table_v->val);
644 return true;
645 }
646
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)647 bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
648 upb_tabval* table_v = inttable_val(t, key);
649 if (!table_v) return false;
650 table_v->val = val.val;
651 return true;
652 }
653
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)654 bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
655 bool success;
656 if (key < t->array_size) {
657 if (upb_arrhas(t->array[key])) {
658 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
659 t->array_count--;
660 if (val) {
661 _upb_value_setval(val, t->array[key].val);
662 }
663 mutable_array(t)[key] = empty;
664 success = true;
665 } else {
666 success = false;
667 }
668 } else {
669 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
670 }
671 check(t);
672 return success;
673 }
674
upb_inttable_compact(upb_inttable * t,upb_Arena * a)675 void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
676 /* A power-of-two histogram of the table keys. */
677 size_t counts[UPB_MAXARRSIZE + 1] = {0};
678
679 /* The max key in each bucket. */
680 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
681
682 {
683 intptr_t iter = UPB_INTTABLE_BEGIN;
684 uintptr_t key;
685 upb_value val;
686 while (upb_inttable_next(t, &key, &val, &iter)) {
687 int bucket = log2ceil(key);
688 max[bucket] = UPB_MAX(max[bucket], key);
689 counts[bucket]++;
690 }
691 }
692
693 /* Find the largest power of two that satisfies the MIN_DENSITY
694 * definition (while actually having some keys). */
695 size_t arr_count = upb_inttable_count(t);
696 int size_lg2;
697 upb_inttable new_t;
698
699 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
700 if (counts[size_lg2] == 0) {
701 /* We can halve again without losing any entries. */
702 continue;
703 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
704 break;
705 }
706
707 arr_count -= counts[size_lg2];
708 }
709
710 UPB_ASSERT(arr_count <= upb_inttable_count(t));
711
712 {
713 /* Insert all elements into new, perfectly-sized table. */
714 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
715 size_t hash_count = upb_inttable_count(t) - arr_count;
716 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
717 int hashsize_lg2 = log2ceil(hash_size);
718
719 upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
720
721 {
722 intptr_t iter = UPB_INTTABLE_BEGIN;
723 uintptr_t key;
724 upb_value val;
725 while (upb_inttable_next(t, &key, &val, &iter)) {
726 upb_inttable_insert(&new_t, key, val, a);
727 }
728 }
729
730 UPB_ASSERT(new_t.array_size == arr_size);
731 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
732 }
733 *t = new_t;
734 }
735
736 // Iteration.
737
upb_inttable_next(const upb_inttable * t,uintptr_t * key,upb_value * val,intptr_t * iter)738 bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
739 intptr_t* iter) {
740 intptr_t i = *iter;
741 if ((size_t)(i + 1) <= t->array_size) {
742 while ((size_t)++i < t->array_size) {
743 upb_tabval ent = t->array[i];
744 if (upb_arrhas(ent)) {
745 *key = i;
746 *val = _upb_value_val(ent.val);
747 *iter = i;
748 return true;
749 }
750 }
751 i--; // Back up to exactly one position before the start of the table.
752 }
753
754 size_t tab_idx = next(&t->t, i - t->array_size);
755 if (tab_idx < upb_table_size(&t->t)) {
756 upb_tabent* ent = &t->t.entries[tab_idx];
757 *key = ent->key;
758 *val = _upb_value_val(ent->val.val);
759 *iter = tab_idx + t->array_size;
760 return true;
761 }
762
763 return false;
764 }
765
upb_inttable_removeiter(upb_inttable * t,intptr_t * iter)766 void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
767 intptr_t i = *iter;
768 if ((size_t)i < t->array_size) {
769 t->array_count--;
770 mutable_array(t)[i].val = -1;
771 } else {
772 upb_tabent* ent = &t->t.entries[i - t->array_size];
773 upb_tabent* prev = NULL;
774
775 // Linear search, not great.
776 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
777 for (upb_tabent* e = t->t.entries; e != end; e++) {
778 if (e->next == ent) {
779 prev = e;
780 break;
781 }
782 }
783
784 if (prev) {
785 prev->next = ent->next;
786 }
787
788 t->t.count--;
789 ent->key = 0;
790 ent->next = NULL;
791 }
792 }
793
upb_strtable_next2(const upb_strtable * t,upb_StringView * key,upb_value * val,intptr_t * iter)794 bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
795 upb_value* val, intptr_t* iter) {
796 size_t tab_idx = next(&t->t, *iter);
797 if (tab_idx < upb_table_size(&t->t)) {
798 upb_tabent* ent = &t->t.entries[tab_idx];
799 uint32_t len;
800 key->data = upb_tabstr(ent->key, &len);
801 key->size = len;
802 *val = _upb_value_val(ent->val.val);
803 *iter = tab_idx;
804 return true;
805 }
806
807 return false;
808 }
809
upb_strtable_removeiter(upb_strtable * t,intptr_t * iter)810 void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
811 intptr_t i = *iter;
812 upb_tabent* ent = &t->t.entries[i];
813 upb_tabent* prev = NULL;
814
815 // Linear search, not great.
816 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
817 for (upb_tabent* e = t->t.entries; e != end; e++) {
818 if (e->next == ent) {
819 prev = e;
820 break;
821 }
822 }
823
824 if (prev) {
825 prev->next = ent->next;
826 }
827
828 t->t.count--;
829 ent->key = 0;
830 ent->next = NULL;
831 }
832
upb_strtable_setentryvalue(upb_strtable * t,intptr_t iter,upb_value v)833 void upb_strtable_setentryvalue(upb_strtable* t, intptr_t iter, upb_value v) {
834 upb_tabent* ent = &t->t.entries[iter];
835 ent->val.val = v.val;
836 }
837