1 #include "Python.h"
2 
3 #include "pycore_bitutils.h"      // _Py_popcount32
4 #include "pycore_hamt.h"
5 #include "pycore_initconfig.h"    // _PyStatus_OK()
6 #include "pycore_object.h"        // _PyObject_GC_TRACK()
7 #include <stddef.h>               // offsetof()
8 
9 /*
10 This file provides an implementation of an immutable mapping using the
11 Hash Array Mapped Trie (or HAMT) datastructure.
12 
13 This design allows to have:
14 
15 1. Efficient copy: immutable mappings can be copied by reference,
16    making it an O(1) operation.
17 
18 2. Efficient mutations: due to structural sharing, only a portion of
19    the trie needs to be copied when the collection is mutated.  The
20    cost of set/delete operations is O(log N).
21 
22 3. Efficient lookups: O(log N).
23 
24 (where N is number of key/value items in the immutable mapping.)
25 
26 
27 HAMT
28 ====
29 
30 The core idea of HAMT is that the shape of the trie is encoded into the
31 hashes of keys.
32 
33 Say we want to store a K/V pair in our mapping.  First, we calculate the
34 hash of K, let's say it's 19830128, or in binary:
35 
36     0b1001011101001010101110000 = 19830128
37 
38 Now let's partition this bit representation of the hash into blocks of
39 5 bits each:
40 
41     0b00_00000_10010_11101_00101_01011_10000 = 19830128
42           (6)   (5)   (4)   (3)   (2)   (1)
43 
44 Each block of 5 bits represents a number between 0 and 31.  So if we have
45 a tree that consists of nodes, each of which is an array of 32 pointers,
46 those 5-bit blocks will encode a position on a single tree level.
47 
48 For example, storing the key K with hash 19830128, results in the following
49 tree structure:
50 
51                      (array of 32 pointers)
52                      +---+ -- +----+----+----+ -- +----+
53   root node          | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b10000 = 16 (1)
54   (level 1)          +---+ -- +----+----+----+ -- +----+
55                                       |
56                      +---+ -- +----+----+----+ -- +----+
57   a 2nd level node   | 0 | .. | 10 | 11 | 12 | .. | 31 |   0b01011 = 11 (2)
58                      +---+ -- +----+----+----+ -- +----+
59                                       |
60                      +---+ -- +----+----+----+ -- +----+
61   a 3rd level node   | 0 | .. | 04 | 05 | 06 | .. | 31 |   0b00101 = 5  (3)
62                      +---+ -- +----+----+----+ -- +----+
63                                       |
64                      +---+ -- +----+----+----+----+
65   a 4th level node   | 0 | .. | 04 | 29 | 30 | 31 |        0b11101 = 29 (4)
66                      +---+ -- +----+----+----+----+
67                                       |
68                      +---+ -- +----+----+----+ -- +----+
69   a 5th level node   | 0 | .. | 17 | 18 | 19 | .. | 31 |   0b10010 = 18 (5)
70                      +---+ -- +----+----+----+ -- +----+
71                                       |
72                        +--------------+
73                        |
74                      +---+ -- +----+----+----+ -- +----+
75   a 6th level node   | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b00000 = 0  (6)
76                      +---+ -- +----+----+----+ -- +----+
77                        |
78                        V -- our value (or collision)
79 
80 To rehash: for a K/V pair, the hash of K encodes where in the tree V will
81 be stored.
82 
83 To optimize memory footprint and handle hash collisions, our implementation
84 uses three different types of nodes:
85 
86  * A Bitmap node;
87  * An Array node;
88  * A Collision node.
89 
90 Because we implement an immutable dictionary, our nodes are also
91 immutable.  Therefore, when we need to modify a node, we copy it, and
92 do that modification to the copy.
93 
94 
95 Array Nodes
96 -----------
97 
98 These nodes are very simple.  Essentially they are arrays of 32 pointers
99 we used to illustrate the high-level idea in the previous section.
100 
101 We use Array nodes only when we need to store more than 16 pointers
102 in a single node.
103 
104 Array nodes do not store key objects or value objects.  They are used
105 only as an indirection level - their pointers point to other nodes in
106 the tree.
107 
108 
109 Bitmap Node
110 -----------
111 
112 Allocating a new 32-pointers array for every node of our tree would be
113 very expensive.  Unless we store millions of keys, most of tree nodes would
114 be very sparse.
115 
116 When we have less than 16 elements in a node, we don't want to use the
117 Array node, that would mean that we waste a lot of memory.  Instead,
118 we can use bitmap compression and can have just as many pointers
119 as we need!
120 
121 Bitmap nodes consist of two fields:
122 
123 1. An array of pointers.  If a Bitmap node holds N elements, the
124    array will be of N pointers.
125 
126 2. A 32bit integer -- a bitmap field.  If an N-th bit is set in the
127    bitmap, it means that the node has an N-th element.
128 
129 For example, say we need to store a 3 elements sparse array:
130 
131    +---+  --  +---+  --  +----+  --  +----+
132    | 0 |  ..  | 4 |  ..  | 11 |  ..  | 17 |
133    +---+  --  +---+  --  +----+  --  +----+
134                 |          |           |
135                 o1         o2          o3
136 
137 We allocate a three-pointer Bitmap node.  Its bitmap field will be
138 then set to:
139 
140    0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
141 
142 To check if our Bitmap node has an I-th element we can do:
143 
144    bitmap & (1 << I)
145 
146 
147 And here's a formula to calculate a position in our pointer array
148 which would correspond to an I-th element:
149 
150    popcount(bitmap & ((1 << I) - 1))
151 
152 
153 Let's break it down:
154 
155  * `popcount` is a function that returns a number of bits set to 1;
156 
157  * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
158    set to the *right* of our bit.
159 
160 
161 So for our 17, 11, and 4 indexes:
162 
163  * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
164 
165  * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
166 
167  * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
168 
169 
170 To conclude: Bitmap nodes are just like Array nodes -- they can store
171 a number of pointers, but use bitmap compression to eliminate unused
172 pointers.
173 
174 
175 Bitmap nodes have two pointers for each item:
176 
177   +----+----+----+----+  --  +----+----+
178   | k1 | v1 | k2 | v2 |  ..  | kN | vN |
179   +----+----+----+----+  --  +----+----+
180 
181 When kI == NULL, vI points to another tree level.
182 
183 When kI != NULL, the actual key object is stored in kI, and its
184 value is stored in vI.
185 
186 
187 Collision Nodes
188 ---------------
189 
190 Collision nodes are simple arrays of pointers -- two pointers per
191 key/value.  When there's a hash collision, say for k1/v1 and k2/v2
192 we have `hash(k1)==hash(k2)`.  Then our collision node will be:
193 
194   +----+----+----+----+
195   | k1 | v1 | k2 | v2 |
196   +----+----+----+----+
197 
198 
199 Tree Structure
200 --------------
201 
202 All nodes are PyObjects.
203 
204 The `PyHamtObject` object has a pointer to the root node (h_root),
205 and has a length field (h_count).
206 
207 High-level functions accept a PyHamtObject object and dispatch to
208 lower-level functions depending on what kind of node h_root points to.
209 
210 
211 Operations
212 ==========
213 
214 There are three fundamental operations on an immutable dictionary:
215 
216 1. "o.assoc(k, v)" will return a new immutable dictionary, that will be
217    a copy of "o", but with the "k/v" item set.
218 
219    Functions in this file:
220 
221         hamt_node_assoc, hamt_node_bitmap_assoc,
222         hamt_node_array_assoc, hamt_node_collision_assoc
223 
224    `hamt_node_assoc` function accepts a node object, and calls
225    other functions depending on its actual type.
226 
227 2. "o.find(k)" will lookup key "k" in "o".
228 
229    Functions:
230 
231         hamt_node_find, hamt_node_bitmap_find,
232         hamt_node_array_find, hamt_node_collision_find
233 
234 3. "o.without(k)" will return a new immutable dictionary, that will be
235    a copy of "o", buth without the "k" key.
236 
237    Functions:
238 
239         hamt_node_without, hamt_node_bitmap_without,
240         hamt_node_array_without, hamt_node_collision_without
241 
242 
243 Further Reading
244 ===============
245 
246 1. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
247 
248 2. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
249 
250 3. Clojure's PersistentHashMap implementation:
251    https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
252 
253 
254 Debug
255 =====
256 
257 The HAMT datatype is accessible for testing purposes under the
258 `_testcapi` module:
259 
260     >>> from _testcapi import hamt
261     >>> h = hamt()
262     >>> h2 = h.set('a', 2)
263     >>> h3 = h2.set('b', 3)
264     >>> list(h3)
265     ['a', 'b']
266 
267 When CPython is built in debug mode, a '__dump__()' method is available
268 to introspect the tree:
269 
270     >>> print(h3.__dump__())
271     HAMT(len=2):
272         BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
273             'a': 2
274             'b': 3
275 */
276 
277 
278 #define IS_ARRAY_NODE(node)     Py_IS_TYPE(node, &_PyHamt_ArrayNode_Type)
279 #define IS_BITMAP_NODE(node)    Py_IS_TYPE(node, &_PyHamt_BitmapNode_Type)
280 #define IS_COLLISION_NODE(node) Py_IS_TYPE(node, &_PyHamt_CollisionNode_Type)
281 
282 
283 /* Return type for 'find' (lookup a key) functions.
284 
285    * F_ERROR - an error occurred;
286    * F_NOT_FOUND - the key was not found;
287    * F_FOUND - the key was found.
288 */
289 typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
290 
291 
292 /* Return type for 'without' (delete a key) functions.
293 
294    * W_ERROR - an error occurred;
295    * W_NOT_FOUND - the key was not found: there's nothing to delete;
296    * W_EMPTY - the key was found: the node/tree would be empty
297      if the key is deleted;
298    * W_NEWNODE - the key was found: a new node/tree is returned
299      without that key.
300 */
301 typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
302 
303 
304 /* Low-level iterator protocol type.
305 
306    * I_ITEM - a new item has been yielded;
307    * I_END - the whole tree was visited (similar to StopIteration).
308 */
309 typedef enum {I_ITEM, I_END} hamt_iter_t;
310 
311 
312 #define HAMT_ARRAY_NODE_SIZE 32
313 
314 
315 typedef struct {
316     PyObject_HEAD
317     PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
318     Py_ssize_t a_count;
319 } PyHamtNode_Array;
320 
321 
322 typedef struct {
323     PyObject_VAR_HEAD
324     uint32_t b_bitmap;
325     PyObject *b_array[1];
326 } PyHamtNode_Bitmap;
327 
328 
329 typedef struct {
330     PyObject_VAR_HEAD
331     int32_t c_hash;
332     PyObject *c_array[1];
333 } PyHamtNode_Collision;
334 
335 
336 static PyHamtNode_Bitmap *_empty_bitmap_node;
337 static PyHamtObject *_empty_hamt;
338 
339 
340 static PyHamtObject *
341 hamt_alloc(void);
342 
343 static PyHamtNode *
344 hamt_node_assoc(PyHamtNode *node,
345                 uint32_t shift, int32_t hash,
346                 PyObject *key, PyObject *val, int* added_leaf);
347 
348 static hamt_without_t
349 hamt_node_without(PyHamtNode *node,
350                   uint32_t shift, int32_t hash,
351                   PyObject *key,
352                   PyHamtNode **new_node);
353 
354 static hamt_find_t
355 hamt_node_find(PyHamtNode *node,
356                uint32_t shift, int32_t hash,
357                PyObject *key, PyObject **val);
358 
359 #ifdef Py_DEBUG
360 static int
361 hamt_node_dump(PyHamtNode *node,
362                _PyUnicodeWriter *writer, int level);
363 #endif
364 
365 static PyHamtNode *
366 hamt_node_array_new(Py_ssize_t);
367 
368 static PyHamtNode *
369 hamt_node_collision_new(int32_t hash, Py_ssize_t size);
370 
371 static inline Py_ssize_t
372 hamt_node_collision_count(PyHamtNode_Collision *node);
373 
374 
375 #ifdef Py_DEBUG
376 static void
_hamt_node_array_validate(void * obj_raw)377 _hamt_node_array_validate(void *obj_raw)
378 {
379     PyObject *obj = _PyObject_CAST(obj_raw);
380     assert(IS_ARRAY_NODE(obj));
381     PyHamtNode_Array *node = (PyHamtNode_Array*)obj;
382     Py_ssize_t i = 0, count = 0;
383     for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
384         if (node->a_array[i] != NULL) {
385             count++;
386         }
387     }
388     assert(count == node->a_count);
389 }
390 
391 #define VALIDATE_ARRAY_NODE(NODE) \
392     do { _hamt_node_array_validate(NODE); } while (0);
393 #else
394 #define VALIDATE_ARRAY_NODE(NODE)
395 #endif
396 
397 
398 /* Returns -1 on error */
399 static inline int32_t
hamt_hash(PyObject * o)400 hamt_hash(PyObject *o)
401 {
402     Py_hash_t hash = PyObject_Hash(o);
403 
404 #if SIZEOF_PY_HASH_T <= 4
405     return hash;
406 #else
407     if (hash == -1) {
408         /* exception */
409         return -1;
410     }
411 
412     /* While it's somewhat suboptimal to reduce Python's 64 bit hash to
413        32 bits via XOR, it seems that the resulting hash function
414        is good enough (this is also how Long type is hashed in Java.)
415        Storing 10, 100, 1000 Python strings results in a relatively
416        shallow and uniform tree structure.
417 
418        Also it's worth noting that it would be possible to adapt the tree
419        structure to 64 bit hashes, but that would increase memory pressure
420        and provide little to no performance benefits for collections with
421        fewer than billions of key/value pairs.
422 
423        Important: do not change this hash reducing function. There are many
424        tests that need an exact tree shape to cover all code paths and
425        we do that by specifying concrete values for test data's `__hash__`.
426        If this function is changed most of the regression tests would
427        become useless.
428     */
429     int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
430     return xored == -1 ? -2 : xored;
431 #endif
432 }
433 
434 static inline uint32_t
hamt_mask(int32_t hash,uint32_t shift)435 hamt_mask(int32_t hash, uint32_t shift)
436 {
437     return (((uint32_t)hash >> shift) & 0x01f);
438 }
439 
440 static inline uint32_t
hamt_bitpos(int32_t hash,uint32_t shift)441 hamt_bitpos(int32_t hash, uint32_t shift)
442 {
443     return (uint32_t)1 << hamt_mask(hash, shift);
444 }
445 
446 static inline uint32_t
hamt_bitindex(uint32_t bitmap,uint32_t bit)447 hamt_bitindex(uint32_t bitmap, uint32_t bit)
448 {
449     return (uint32_t)_Py_popcount32(bitmap & (bit - 1));
450 }
451 
452 
453 /////////////////////////////////// Dump Helpers
454 #ifdef Py_DEBUG
455 
456 static int
_hamt_dump_ident(_PyUnicodeWriter * writer,int level)457 _hamt_dump_ident(_PyUnicodeWriter *writer, int level)
458 {
459     /* Write `'    ' * level` to the `writer` */
460     PyObject *str = NULL;
461     PyObject *num = NULL;
462     PyObject *res = NULL;
463     int ret = -1;
464 
465     str = PyUnicode_FromString("    ");
466     if (str == NULL) {
467         goto error;
468     }
469 
470     num = PyLong_FromLong((long)level);
471     if (num == NULL) {
472         goto error;
473     }
474 
475     res = PyNumber_Multiply(str, num);
476     if (res == NULL) {
477         goto error;
478     }
479 
480     ret = _PyUnicodeWriter_WriteStr(writer, res);
481 
482 error:
483     Py_XDECREF(res);
484     Py_XDECREF(str);
485     Py_XDECREF(num);
486     return ret;
487 }
488 
489 static int
_hamt_dump_format(_PyUnicodeWriter * writer,const char * format,...)490 _hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
491 {
492     /* A convenient helper combining _PyUnicodeWriter_WriteStr and
493        PyUnicode_FromFormatV.
494     */
495     PyObject* msg;
496     int ret;
497 
498     va_list vargs;
499 #ifdef HAVE_STDARG_PROTOTYPES
500     va_start(vargs, format);
501 #else
502     va_start(vargs);
503 #endif
504     msg = PyUnicode_FromFormatV(format, vargs);
505     va_end(vargs);
506 
507     if (msg == NULL) {
508         return -1;
509     }
510 
511     ret = _PyUnicodeWriter_WriteStr(writer, msg);
512     Py_DECREF(msg);
513     return ret;
514 }
515 
516 #endif  /* Py_DEBUG */
517 /////////////////////////////////// Bitmap Node
518 
519 
520 static PyHamtNode *
hamt_node_bitmap_new(Py_ssize_t size)521 hamt_node_bitmap_new(Py_ssize_t size)
522 {
523     /* Create a new bitmap node of size 'size' */
524 
525     PyHamtNode_Bitmap *node;
526     Py_ssize_t i;
527 
528     assert(size >= 0);
529     assert(size % 2 == 0);
530 
531     if (size == 0 && _empty_bitmap_node != NULL) {
532         Py_INCREF(_empty_bitmap_node);
533         return (PyHamtNode *)_empty_bitmap_node;
534     }
535 
536     /* No freelist; allocate a new bitmap node */
537     node = PyObject_GC_NewVar(
538         PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
539     if (node == NULL) {
540         return NULL;
541     }
542 
543     Py_SET_SIZE(node, size);
544 
545     for (i = 0; i < size; i++) {
546         node->b_array[i] = NULL;
547     }
548 
549     node->b_bitmap = 0;
550 
551     _PyObject_GC_TRACK(node);
552 
553     if (size == 0 && _empty_bitmap_node == NULL) {
554         /* Since bitmap nodes are immutable, we can cache the instance
555            for size=0 and reuse it whenever we need an empty bitmap node.
556         */
557         _empty_bitmap_node = node;
558         Py_INCREF(_empty_bitmap_node);
559     }
560 
561     return (PyHamtNode *)node;
562 }
563 
564 static inline Py_ssize_t
hamt_node_bitmap_count(PyHamtNode_Bitmap * node)565 hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
566 {
567     return Py_SIZE(node) / 2;
568 }
569 
570 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone(PyHamtNode_Bitmap * node)571 hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
572 {
573     /* Clone a bitmap node; return a new one with the same child notes. */
574 
575     PyHamtNode_Bitmap *clone;
576     Py_ssize_t i;
577 
578     clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
579     if (clone == NULL) {
580         return NULL;
581     }
582 
583     for (i = 0; i < Py_SIZE(node); i++) {
584         Py_XINCREF(node->b_array[i]);
585         clone->b_array[i] = node->b_array[i];
586     }
587 
588     clone->b_bitmap = node->b_bitmap;
589     return clone;
590 }
591 
592 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone_without(PyHamtNode_Bitmap * o,uint32_t bit)593 hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
594 {
595     assert(bit & o->b_bitmap);
596     assert(hamt_node_bitmap_count(o) > 1);
597 
598     PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
599         Py_SIZE(o) - 2);
600     if (new == NULL) {
601         return NULL;
602     }
603 
604     uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
605     uint32_t key_idx = 2 * idx;
606     uint32_t val_idx = key_idx + 1;
607     uint32_t i;
608 
609     for (i = 0; i < key_idx; i++) {
610         Py_XINCREF(o->b_array[i]);
611         new->b_array[i] = o->b_array[i];
612     }
613 
614     assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
615     for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
616         Py_XINCREF(o->b_array[i]);
617         new->b_array[i - 2] = o->b_array[i];
618     }
619 
620     new->b_bitmap = o->b_bitmap & ~bit;
621     return new;
622 }
623 
624 static PyHamtNode *
hamt_node_new_bitmap_or_collision(uint32_t shift,PyObject * key1,PyObject * val1,int32_t key2_hash,PyObject * key2,PyObject * val2)625 hamt_node_new_bitmap_or_collision(uint32_t shift,
626                                   PyObject *key1, PyObject *val1,
627                                   int32_t key2_hash,
628                                   PyObject *key2, PyObject *val2)
629 {
630     /* Helper method.  Creates a new node for key1/val and key2/val2
631        pairs.
632 
633        If key1 hash is equal to the hash of key2, a Collision node
634        will be created.  If they are not equal, a Bitmap node is
635        created.
636     */
637 
638     int32_t key1_hash = hamt_hash(key1);
639     if (key1_hash == -1) {
640         return NULL;
641     }
642 
643     if (key1_hash == key2_hash) {
644         PyHamtNode_Collision *n;
645         n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
646         if (n == NULL) {
647             return NULL;
648         }
649 
650         Py_INCREF(key1);
651         n->c_array[0] = key1;
652         Py_INCREF(val1);
653         n->c_array[1] = val1;
654 
655         Py_INCREF(key2);
656         n->c_array[2] = key2;
657         Py_INCREF(val2);
658         n->c_array[3] = val2;
659 
660         return (PyHamtNode *)n;
661     }
662     else {
663         int added_leaf = 0;
664         PyHamtNode *n = hamt_node_bitmap_new(0);
665         if (n == NULL) {
666             return NULL;
667         }
668 
669         PyHamtNode *n2 = hamt_node_assoc(
670             n, shift, key1_hash, key1, val1, &added_leaf);
671         Py_DECREF(n);
672         if (n2 == NULL) {
673             return NULL;
674         }
675 
676         n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
677         Py_DECREF(n2);
678         if (n == NULL) {
679             return NULL;
680         }
681 
682         return n;
683     }
684 }
685 
686 static PyHamtNode *
hamt_node_bitmap_assoc(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)687 hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
688                        uint32_t shift, int32_t hash,
689                        PyObject *key, PyObject *val, int* added_leaf)
690 {
691     /* assoc operation for bitmap nodes.
692 
693        Return: a new node, or self if key/val already is in the
694        collection.
695 
696        'added_leaf' is later used in '_PyHamt_Assoc' to determine if
697        `hamt.set(key, val)` increased the size of the collection.
698     */
699 
700     uint32_t bit = hamt_bitpos(hash, shift);
701     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
702 
703     /* Bitmap node layout:
704 
705     +------+------+------+------+  ---  +------+------+
706     | key1 | val1 | key2 | val2 |  ...  | keyN | valN |
707     +------+------+------+------+  ---  +------+------+
708     where `N < Py_SIZE(node)`.
709 
710     The `node->b_bitmap` field is a bitmap.  For a given
711     `(shift, hash)` pair we can determine:
712 
713      - If this node has the corresponding key/val slots.
714      - The index of key/val slots.
715     */
716 
717     if (self->b_bitmap & bit) {
718         /* The key is set in this node */
719 
720         uint32_t key_idx = 2 * idx;
721         uint32_t val_idx = key_idx + 1;
722 
723         assert(val_idx < (size_t)Py_SIZE(self));
724 
725         PyObject *key_or_null = self->b_array[key_idx];
726         PyObject *val_or_node = self->b_array[val_idx];
727 
728         if (key_or_null == NULL) {
729             /* key is NULL.  This means that we have a few keys
730                that have the same (hash, shift) pair. */
731 
732             assert(val_or_node != NULL);
733 
734             PyHamtNode *sub_node = hamt_node_assoc(
735                 (PyHamtNode *)val_or_node,
736                 shift + 5, hash, key, val, added_leaf);
737             if (sub_node == NULL) {
738                 return NULL;
739             }
740 
741             if (val_or_node == (PyObject *)sub_node) {
742                 Py_DECREF(sub_node);
743                 Py_INCREF(self);
744                 return (PyHamtNode *)self;
745             }
746 
747             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
748             if (ret == NULL) {
749                 return NULL;
750             }
751             Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
752             return (PyHamtNode *)ret;
753         }
754 
755         assert(key != NULL);
756         /* key is not NULL.  This means that we have only one other
757            key in this collection that matches our hash for this shift. */
758 
759         int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
760         if (comp_err < 0) {  /* exception in __eq__ */
761             return NULL;
762         }
763         if (comp_err == 1) {  /* key == key_or_null */
764             if (val == val_or_node) {
765                 /* we already have the same key/val pair; return self. */
766                 Py_INCREF(self);
767                 return (PyHamtNode *)self;
768             }
769 
770             /* We're setting a new value for the key we had before.
771                Make a new bitmap node with a replaced value, and return it. */
772             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
773             if (ret == NULL) {
774                 return NULL;
775             }
776             Py_INCREF(val);
777             Py_SETREF(ret->b_array[val_idx], val);
778             return (PyHamtNode *)ret;
779         }
780 
781         /* It's a new key, and it has the same index as *one* another key.
782            We have a collision.  We need to create a new node which will
783            combine the existing key and the key we're adding.
784 
785            `hamt_node_new_bitmap_or_collision` will either create a new
786            Collision node if the keys have identical hashes, or
787            a new Bitmap node.
788         */
789         PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
790             shift + 5,
791             key_or_null, val_or_node,  /* existing key/val */
792             hash,
793             key, val  /* new key/val */
794         );
795         if (sub_node == NULL) {
796             return NULL;
797         }
798 
799         PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
800         if (ret == NULL) {
801             Py_DECREF(sub_node);
802             return NULL;
803         }
804         Py_SETREF(ret->b_array[key_idx], NULL);
805         Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
806 
807         *added_leaf = 1;
808         return (PyHamtNode *)ret;
809     }
810     else {
811         /* There was no key before with the same (shift,hash). */
812 
813         uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap);
814 
815         if (n >= 16) {
816             /* When we have a situation where we want to store more
817                than 16 nodes at one level of the tree, we no longer
818                want to use the Bitmap node with bitmap encoding.
819 
820                Instead we start using an Array node, which has
821                simpler (faster) implementation at the expense of
822                having preallocated 32 pointers for its keys/values
823                pairs.
824 
825                Small hamt objects (<30 keys) usually don't have any
826                Array nodes at all.  Between ~30 and ~400 keys hamt
827                objects usually have one Array node, and usually it's
828                a root node.
829             */
830 
831             uint32_t jdx = hamt_mask(hash, shift);
832             /* 'jdx' is the index of where the new key should be added
833                in the new Array node we're about to create. */
834 
835             PyHamtNode *empty = NULL;
836             PyHamtNode_Array *new_node = NULL;
837             PyHamtNode *res = NULL;
838 
839             /* Create a new Array node. */
840             new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
841             if (new_node == NULL) {
842                 goto fin;
843             }
844 
845             /* Create an empty bitmap node for the next
846                hamt_node_assoc call. */
847             empty = hamt_node_bitmap_new(0);
848             if (empty == NULL) {
849                 goto fin;
850             }
851 
852             /* Make a new bitmap node for the key/val we're adding.
853                Set that bitmap node to new-array-node[jdx]. */
854             new_node->a_array[jdx] = hamt_node_assoc(
855                 empty, shift + 5, hash, key, val, added_leaf);
856             if (new_node->a_array[jdx] == NULL) {
857                 goto fin;
858             }
859 
860             /* Copy existing key/value pairs from the current Bitmap
861                node to the new Array node we've just created. */
862             Py_ssize_t i, j;
863             for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
864                 if (((self->b_bitmap >> i) & 1) != 0) {
865                     /* Ensure we don't accidentally override `jdx` element
866                        we set few lines above.
867                     */
868                     assert(new_node->a_array[i] == NULL);
869 
870                     if (self->b_array[j] == NULL) {
871                         new_node->a_array[i] =
872                             (PyHamtNode *)self->b_array[j + 1];
873                         Py_INCREF(new_node->a_array[i]);
874                     }
875                     else {
876                         int32_t rehash = hamt_hash(self->b_array[j]);
877                         if (rehash == -1) {
878                             goto fin;
879                         }
880 
881                         new_node->a_array[i] = hamt_node_assoc(
882                             empty, shift + 5,
883                             rehash,
884                             self->b_array[j],
885                             self->b_array[j + 1],
886                             added_leaf);
887 
888                         if (new_node->a_array[i] == NULL) {
889                             goto fin;
890                         }
891                     }
892                     j += 2;
893                 }
894             }
895 
896             VALIDATE_ARRAY_NODE(new_node)
897 
898             /* That's it! */
899             res = (PyHamtNode *)new_node;
900 
901         fin:
902             Py_XDECREF(empty);
903             if (res == NULL) {
904                 Py_XDECREF(new_node);
905             }
906             return res;
907         }
908         else {
909             /* We have less than 16 keys at this level; let's just
910                create a new bitmap node out of this node with the
911                new key/val pair added. */
912 
913             uint32_t key_idx = 2 * idx;
914             uint32_t val_idx = key_idx + 1;
915             uint32_t i;
916 
917             *added_leaf = 1;
918 
919             /* Allocate new Bitmap node which can have one more key/val
920                pair in addition to what we have already. */
921             PyHamtNode_Bitmap *new_node =
922                 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
923             if (new_node == NULL) {
924                 return NULL;
925             }
926 
927             /* Copy all keys/values that will be before the new key/value
928                we are adding. */
929             for (i = 0; i < key_idx; i++) {
930                 Py_XINCREF(self->b_array[i]);
931                 new_node->b_array[i] = self->b_array[i];
932             }
933 
934             /* Set the new key/value to the new Bitmap node. */
935             Py_INCREF(key);
936             new_node->b_array[key_idx] = key;
937             Py_INCREF(val);
938             new_node->b_array[val_idx] = val;
939 
940             /* Copy all keys/values that will be after the new key/value
941                we are adding. */
942             assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
943             for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
944                 Py_XINCREF(self->b_array[i]);
945                 new_node->b_array[i + 2] = self->b_array[i];
946             }
947 
948             new_node->b_bitmap = self->b_bitmap | bit;
949             return (PyHamtNode *)new_node;
950         }
951     }
952 }
953 
954 static hamt_without_t
hamt_node_bitmap_without(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)955 hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
956                          uint32_t shift, int32_t hash,
957                          PyObject *key,
958                          PyHamtNode **new_node)
959 {
960     uint32_t bit = hamt_bitpos(hash, shift);
961     if ((self->b_bitmap & bit) == 0) {
962         return W_NOT_FOUND;
963     }
964 
965     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
966 
967     uint32_t key_idx = 2 * idx;
968     uint32_t val_idx = key_idx + 1;
969 
970     PyObject *key_or_null = self->b_array[key_idx];
971     PyObject *val_or_node = self->b_array[val_idx];
972 
973     if (key_or_null == NULL) {
974         /* key == NULL means that 'value' is another tree node. */
975 
976         PyHamtNode *sub_node = NULL;
977 
978         hamt_without_t res = hamt_node_without(
979             (PyHamtNode *)val_or_node,
980             shift + 5, hash, key, &sub_node);
981 
982         switch (res) {
983             case W_EMPTY:
984                 /* It's impossible for us to receive a W_EMPTY here:
985 
986                     - Array nodes are converted to Bitmap nodes when
987                       we delete 16th item from them;
988 
989                     - Collision nodes are converted to Bitmap when
990                       there is one item in them;
991 
992                     - Bitmap node's without() inlines single-item
993                       sub-nodes.
994 
995                    So in no situation we can have a single-item
996                    Bitmap child of another Bitmap node.
997                 */
998                 Py_UNREACHABLE();
999 
1000             case W_NEWNODE: {
1001                 assert(sub_node != NULL);
1002 
1003                 if (IS_BITMAP_NODE(sub_node)) {
1004                     PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1005                     if (hamt_node_bitmap_count(sub_tree) == 1 &&
1006                             sub_tree->b_array[0] != NULL)
1007                     {
1008                         /* A bitmap node with one key/value pair.  Just
1009                            merge it into this node.
1010 
1011                            Note that we don't inline Bitmap nodes that
1012                            have a NULL key -- those nodes point to another
1013                            tree level, and we cannot simply move tree levels
1014                            up or down.
1015                         */
1016 
1017                         PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1018                         if (clone == NULL) {
1019                             Py_DECREF(sub_node);
1020                             return W_ERROR;
1021                         }
1022 
1023                         PyObject *key = sub_tree->b_array[0];
1024                         PyObject *val = sub_tree->b_array[1];
1025 
1026                         Py_INCREF(key);
1027                         Py_XSETREF(clone->b_array[key_idx], key);
1028                         Py_INCREF(val);
1029                         Py_SETREF(clone->b_array[val_idx], val);
1030 
1031                         Py_DECREF(sub_tree);
1032 
1033                         *new_node = (PyHamtNode *)clone;
1034                         return W_NEWNODE;
1035                     }
1036                 }
1037 
1038 #ifdef Py_DEBUG
1039                 /* Ensure that Collision.without implementation
1040                    converts to Bitmap nodes itself.
1041                 */
1042                 if (IS_COLLISION_NODE(sub_node)) {
1043                     assert(hamt_node_collision_count(
1044                             (PyHamtNode_Collision*)sub_node) > 1);
1045                 }
1046 #endif
1047 
1048                 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1049                 if (clone == NULL) {
1050                     return W_ERROR;
1051                 }
1052 
1053                 Py_SETREF(clone->b_array[val_idx],
1054                           (PyObject *)sub_node);  /* borrow */
1055 
1056                 *new_node = (PyHamtNode *)clone;
1057                 return W_NEWNODE;
1058             }
1059 
1060             case W_ERROR:
1061             case W_NOT_FOUND:
1062                 assert(sub_node == NULL);
1063                 return res;
1064 
1065             default:
1066                 Py_UNREACHABLE();
1067         }
1068     }
1069     else {
1070         /* We have a regular key/value pair */
1071 
1072         int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1073         if (cmp < 0) {
1074             return W_ERROR;
1075         }
1076         if (cmp == 0) {
1077             return W_NOT_FOUND;
1078         }
1079 
1080         if (hamt_node_bitmap_count(self) == 1) {
1081             return W_EMPTY;
1082         }
1083 
1084         *new_node = (PyHamtNode *)
1085             hamt_node_bitmap_clone_without(self, bit);
1086         if (*new_node == NULL) {
1087             return W_ERROR;
1088         }
1089 
1090         return W_NEWNODE;
1091     }
1092 }
1093 
1094 static hamt_find_t
hamt_node_bitmap_find(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1095 hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1096                       uint32_t shift, int32_t hash,
1097                       PyObject *key, PyObject **val)
1098 {
1099     /* Lookup a key in a Bitmap node. */
1100 
1101     uint32_t bit = hamt_bitpos(hash, shift);
1102     uint32_t idx;
1103     uint32_t key_idx;
1104     uint32_t val_idx;
1105     PyObject *key_or_null;
1106     PyObject *val_or_node;
1107     int comp_err;
1108 
1109     if ((self->b_bitmap & bit) == 0) {
1110         return F_NOT_FOUND;
1111     }
1112 
1113     idx = hamt_bitindex(self->b_bitmap, bit);
1114     key_idx = idx * 2;
1115     val_idx = key_idx + 1;
1116 
1117     assert(val_idx < (size_t)Py_SIZE(self));
1118 
1119     key_or_null = self->b_array[key_idx];
1120     val_or_node = self->b_array[val_idx];
1121 
1122     if (key_or_null == NULL) {
1123         /* There are a few keys that have the same hash at the current shift
1124            that match our key.  Dispatch the lookup further down the tree. */
1125         assert(val_or_node != NULL);
1126         return hamt_node_find((PyHamtNode *)val_or_node,
1127                               shift + 5, hash, key, val);
1128     }
1129 
1130     /* We have only one key -- a potential match.  Let's compare if the
1131        key we are looking at is equal to the key we are looking for. */
1132     assert(key != NULL);
1133     comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1134     if (comp_err < 0) {  /* exception in __eq__ */
1135         return F_ERROR;
1136     }
1137     if (comp_err == 1) {  /* key == key_or_null */
1138         *val = val_or_node;
1139         return F_FOUND;
1140     }
1141 
1142     return F_NOT_FOUND;
1143 }
1144 
1145 static int
hamt_node_bitmap_traverse(PyHamtNode_Bitmap * self,visitproc visit,void * arg)1146 hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1147 {
1148     /* Bitmap's tp_traverse */
1149 
1150     Py_ssize_t i;
1151 
1152     for (i = Py_SIZE(self); --i >= 0; ) {
1153         Py_VISIT(self->b_array[i]);
1154     }
1155 
1156     return 0;
1157 }
1158 
1159 static void
hamt_node_bitmap_dealloc(PyHamtNode_Bitmap * self)1160 hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1161 {
1162     /* Bitmap's tp_dealloc */
1163 
1164     Py_ssize_t len = Py_SIZE(self);
1165     Py_ssize_t i;
1166 
1167     PyObject_GC_UnTrack(self);
1168     Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc)
1169 
1170     if (len > 0) {
1171         i = len;
1172         while (--i >= 0) {
1173             Py_XDECREF(self->b_array[i]);
1174         }
1175     }
1176 
1177     Py_TYPE(self)->tp_free((PyObject *)self);
1178     Py_TRASHCAN_END
1179 }
1180 
1181 #ifdef Py_DEBUG
1182 static int
hamt_node_bitmap_dump(PyHamtNode_Bitmap * node,_PyUnicodeWriter * writer,int level)1183 hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1184                       _PyUnicodeWriter *writer, int level)
1185 {
1186     /* Debug build: __dump__() method implementation for Bitmap nodes. */
1187 
1188     Py_ssize_t i;
1189     PyObject *tmp1;
1190     PyObject *tmp2;
1191 
1192     if (_hamt_dump_ident(writer, level + 1)) {
1193         goto error;
1194     }
1195 
1196     if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1197                           Py_SIZE(node), Py_SIZE(node) / 2))
1198     {
1199         goto error;
1200     }
1201 
1202     tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1203     if (tmp1 == NULL) {
1204         goto error;
1205     }
1206     tmp2 = _PyLong_Format(tmp1, 2);
1207     Py_DECREF(tmp1);
1208     if (tmp2 == NULL) {
1209         goto error;
1210     }
1211     if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1212         Py_DECREF(tmp2);
1213         goto error;
1214     }
1215     Py_DECREF(tmp2);
1216 
1217     for (i = 0; i < Py_SIZE(node); i += 2) {
1218         PyObject *key_or_null = node->b_array[i];
1219         PyObject *val_or_node = node->b_array[i + 1];
1220 
1221         if (_hamt_dump_ident(writer, level + 2)) {
1222             goto error;
1223         }
1224 
1225         if (key_or_null == NULL) {
1226             if (_hamt_dump_format(writer, "NULL:\n")) {
1227                 goto error;
1228             }
1229 
1230             if (hamt_node_dump((PyHamtNode *)val_or_node,
1231                                writer, level + 2))
1232             {
1233                 goto error;
1234             }
1235         }
1236         else {
1237             if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1238                                   val_or_node))
1239             {
1240                 goto error;
1241             }
1242         }
1243 
1244         if (_hamt_dump_format(writer, "\n")) {
1245             goto error;
1246         }
1247     }
1248 
1249     return 0;
1250 error:
1251     return -1;
1252 }
1253 #endif  /* Py_DEBUG */
1254 
1255 
1256 /////////////////////////////////// Collision Node
1257 
1258 
1259 static PyHamtNode *
hamt_node_collision_new(int32_t hash,Py_ssize_t size)1260 hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1261 {
1262     /* Create a new Collision node. */
1263 
1264     PyHamtNode_Collision *node;
1265     Py_ssize_t i;
1266 
1267     assert(size >= 4);
1268     assert(size % 2 == 0);
1269 
1270     node = PyObject_GC_NewVar(
1271         PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1272     if (node == NULL) {
1273         return NULL;
1274     }
1275 
1276     for (i = 0; i < size; i++) {
1277         node->c_array[i] = NULL;
1278     }
1279 
1280     Py_SET_SIZE(node, size);
1281     node->c_hash = hash;
1282 
1283     _PyObject_GC_TRACK(node);
1284 
1285     return (PyHamtNode *)node;
1286 }
1287 
1288 static hamt_find_t
hamt_node_collision_find_index(PyHamtNode_Collision * self,PyObject * key,Py_ssize_t * idx)1289 hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1290                                Py_ssize_t *idx)
1291 {
1292     /* Lookup `key` in the Collision node `self`.  Set the index of the
1293        found key to 'idx'. */
1294 
1295     Py_ssize_t i;
1296     PyObject *el;
1297 
1298     for (i = 0; i < Py_SIZE(self); i += 2) {
1299         el = self->c_array[i];
1300 
1301         assert(el != NULL);
1302         int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1303         if (cmp < 0) {
1304             return F_ERROR;
1305         }
1306         if (cmp == 1) {
1307             *idx = i;
1308             return F_FOUND;
1309         }
1310     }
1311 
1312     return F_NOT_FOUND;
1313 }
1314 
1315 static PyHamtNode *
hamt_node_collision_assoc(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1316 hamt_node_collision_assoc(PyHamtNode_Collision *self,
1317                           uint32_t shift, int32_t hash,
1318                           PyObject *key, PyObject *val, int* added_leaf)
1319 {
1320     /* Set a new key to this level (currently a Collision node)
1321        of the tree. */
1322 
1323     if (hash == self->c_hash) {
1324         /* The hash of the 'key' we are adding matches the hash of
1325            other keys in this Collision node. */
1326 
1327         Py_ssize_t key_idx = -1;
1328         hamt_find_t found;
1329         PyHamtNode_Collision *new_node;
1330         Py_ssize_t i;
1331 
1332         /* Let's try to lookup the new 'key', maybe we already have it. */
1333         found = hamt_node_collision_find_index(self, key, &key_idx);
1334         switch (found) {
1335             case F_ERROR:
1336                 /* Exception. */
1337                 return NULL;
1338 
1339             case F_NOT_FOUND:
1340                 /* This is a totally new key.  Clone the current node,
1341                    add a new key/value to the cloned node. */
1342 
1343                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1344                     self->c_hash, Py_SIZE(self) + 2);
1345                 if (new_node == NULL) {
1346                     return NULL;
1347                 }
1348 
1349                 for (i = 0; i < Py_SIZE(self); i++) {
1350                     Py_INCREF(self->c_array[i]);
1351                     new_node->c_array[i] = self->c_array[i];
1352                 }
1353 
1354                 Py_INCREF(key);
1355                 new_node->c_array[i] = key;
1356                 Py_INCREF(val);
1357                 new_node->c_array[i + 1] = val;
1358 
1359                 *added_leaf = 1;
1360                 return (PyHamtNode *)new_node;
1361 
1362             case F_FOUND:
1363                 /* There's a key which is equal to the key we are adding. */
1364 
1365                 assert(key_idx >= 0);
1366                 assert(key_idx < Py_SIZE(self));
1367                 Py_ssize_t val_idx = key_idx + 1;
1368 
1369                 if (self->c_array[val_idx] == val) {
1370                     /* We're setting a key/value pair that's already set. */
1371                     Py_INCREF(self);
1372                     return (PyHamtNode *)self;
1373                 }
1374 
1375                 /* We need to replace old value for the key
1376                    with a new value.  Create a new Collision node.*/
1377                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1378                     self->c_hash, Py_SIZE(self));
1379                 if (new_node == NULL) {
1380                     return NULL;
1381                 }
1382 
1383                 /* Copy all elements of the old node to the new one. */
1384                 for (i = 0; i < Py_SIZE(self); i++) {
1385                     Py_INCREF(self->c_array[i]);
1386                     new_node->c_array[i] = self->c_array[i];
1387                 }
1388 
1389                 /* Replace the old value with the new value for the our key. */
1390                 Py_DECREF(new_node->c_array[val_idx]);
1391                 Py_INCREF(val);
1392                 new_node->c_array[val_idx] = val;
1393 
1394                 return (PyHamtNode *)new_node;
1395 
1396             default:
1397                 Py_UNREACHABLE();
1398         }
1399     }
1400     else {
1401         /* The hash of the new key is different from the hash that
1402            all keys of this Collision node have.
1403 
1404            Create a Bitmap node inplace with two children:
1405            key/value pair that we're adding, and the Collision node
1406            we're replacing on this tree level.
1407         */
1408 
1409         PyHamtNode_Bitmap *new_node;
1410         PyHamtNode *assoc_res;
1411 
1412         new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1413         if (new_node == NULL) {
1414             return NULL;
1415         }
1416         new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1417         Py_INCREF(self);
1418         new_node->b_array[1] = (PyObject*) self;
1419 
1420         assoc_res = hamt_node_bitmap_assoc(
1421             new_node, shift, hash, key, val, added_leaf);
1422         Py_DECREF(new_node);
1423         return assoc_res;
1424     }
1425 }
1426 
1427 static inline Py_ssize_t
hamt_node_collision_count(PyHamtNode_Collision * node)1428 hamt_node_collision_count(PyHamtNode_Collision *node)
1429 {
1430     return Py_SIZE(node) / 2;
1431 }
1432 
1433 static hamt_without_t
hamt_node_collision_without(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1434 hamt_node_collision_without(PyHamtNode_Collision *self,
1435                             uint32_t shift, int32_t hash,
1436                             PyObject *key,
1437                             PyHamtNode **new_node)
1438 {
1439     if (hash != self->c_hash) {
1440         return W_NOT_FOUND;
1441     }
1442 
1443     Py_ssize_t key_idx = -1;
1444     hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1445 
1446     switch (found) {
1447         case F_ERROR:
1448             return W_ERROR;
1449 
1450         case F_NOT_FOUND:
1451             return W_NOT_FOUND;
1452 
1453         case F_FOUND:
1454             assert(key_idx >= 0);
1455             assert(key_idx < Py_SIZE(self));
1456 
1457             Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1458 
1459             if (new_count == 0) {
1460                 /* The node has only one key/value pair and it's for the
1461                    key we're trying to delete.  So a new node will be empty
1462                    after the removal.
1463                 */
1464                 return W_EMPTY;
1465             }
1466 
1467             if (new_count == 1) {
1468                 /* The node has two keys, and after deletion the
1469                    new Collision node would have one.  Collision nodes
1470                    with one key shouldn't exist, so convert it to a
1471                    Bitmap node.
1472                 */
1473                 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1474                     hamt_node_bitmap_new(2);
1475                 if (node == NULL) {
1476                     return W_ERROR;
1477                 }
1478 
1479                 if (key_idx == 0) {
1480                     Py_INCREF(self->c_array[2]);
1481                     node->b_array[0] = self->c_array[2];
1482                     Py_INCREF(self->c_array[3]);
1483                     node->b_array[1] = self->c_array[3];
1484                 }
1485                 else {
1486                     assert(key_idx == 2);
1487                     Py_INCREF(self->c_array[0]);
1488                     node->b_array[0] = self->c_array[0];
1489                     Py_INCREF(self->c_array[1]);
1490                     node->b_array[1] = self->c_array[1];
1491                 }
1492 
1493                 node->b_bitmap = hamt_bitpos(hash, shift);
1494 
1495                 *new_node = (PyHamtNode *)node;
1496                 return W_NEWNODE;
1497             }
1498 
1499             /* Allocate a new Collision node with capacity for one
1500                less key/value pair */
1501             PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1502                 hamt_node_collision_new(
1503                     self->c_hash, Py_SIZE(self) - 2);
1504             if (new == NULL) {
1505                 return W_ERROR;
1506             }
1507 
1508             /* Copy all other keys from `self` to `new` */
1509             Py_ssize_t i;
1510             for (i = 0; i < key_idx; i++) {
1511                 Py_INCREF(self->c_array[i]);
1512                 new->c_array[i] = self->c_array[i];
1513             }
1514             for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1515                 Py_INCREF(self->c_array[i]);
1516                 new->c_array[i - 2] = self->c_array[i];
1517             }
1518 
1519             *new_node = (PyHamtNode*)new;
1520             return W_NEWNODE;
1521 
1522         default:
1523             Py_UNREACHABLE();
1524     }
1525 }
1526 
1527 static hamt_find_t
hamt_node_collision_find(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1528 hamt_node_collision_find(PyHamtNode_Collision *self,
1529                          uint32_t shift, int32_t hash,
1530                          PyObject *key, PyObject **val)
1531 {
1532     /* Lookup `key` in the Collision node `self`.  Set the value
1533        for the found key to 'val'. */
1534 
1535     Py_ssize_t idx = -1;
1536     hamt_find_t res;
1537 
1538     res = hamt_node_collision_find_index(self, key, &idx);
1539     if (res == F_ERROR || res == F_NOT_FOUND) {
1540         return res;
1541     }
1542 
1543     assert(idx >= 0);
1544     assert(idx + 1 < Py_SIZE(self));
1545 
1546     *val = self->c_array[idx + 1];
1547     assert(*val != NULL);
1548 
1549     return F_FOUND;
1550 }
1551 
1552 
1553 static int
hamt_node_collision_traverse(PyHamtNode_Collision * self,visitproc visit,void * arg)1554 hamt_node_collision_traverse(PyHamtNode_Collision *self,
1555                              visitproc visit, void *arg)
1556 {
1557     /* Collision's tp_traverse */
1558 
1559     Py_ssize_t i;
1560 
1561     for (i = Py_SIZE(self); --i >= 0; ) {
1562         Py_VISIT(self->c_array[i]);
1563     }
1564 
1565     return 0;
1566 }
1567 
1568 static void
hamt_node_collision_dealloc(PyHamtNode_Collision * self)1569 hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1570 {
1571     /* Collision's tp_dealloc */
1572 
1573     Py_ssize_t len = Py_SIZE(self);
1574 
1575     PyObject_GC_UnTrack(self);
1576     Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc)
1577 
1578     if (len > 0) {
1579 
1580         while (--len >= 0) {
1581             Py_XDECREF(self->c_array[len]);
1582         }
1583     }
1584 
1585     Py_TYPE(self)->tp_free((PyObject *)self);
1586     Py_TRASHCAN_END
1587 }
1588 
1589 #ifdef Py_DEBUG
1590 static int
hamt_node_collision_dump(PyHamtNode_Collision * node,_PyUnicodeWriter * writer,int level)1591 hamt_node_collision_dump(PyHamtNode_Collision *node,
1592                          _PyUnicodeWriter *writer, int level)
1593 {
1594     /* Debug build: __dump__() method implementation for Collision nodes. */
1595 
1596     Py_ssize_t i;
1597 
1598     if (_hamt_dump_ident(writer, level + 1)) {
1599         goto error;
1600     }
1601 
1602     if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1603                           Py_SIZE(node), node))
1604     {
1605         goto error;
1606     }
1607 
1608     for (i = 0; i < Py_SIZE(node); i += 2) {
1609         PyObject *key = node->c_array[i];
1610         PyObject *val = node->c_array[i + 1];
1611 
1612         if (_hamt_dump_ident(writer, level + 2)) {
1613             goto error;
1614         }
1615 
1616         if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1617             goto error;
1618         }
1619     }
1620 
1621     return 0;
1622 error:
1623     return -1;
1624 }
1625 #endif  /* Py_DEBUG */
1626 
1627 
1628 /////////////////////////////////// Array Node
1629 
1630 
1631 static PyHamtNode *
hamt_node_array_new(Py_ssize_t count)1632 hamt_node_array_new(Py_ssize_t count)
1633 {
1634     Py_ssize_t i;
1635 
1636     PyHamtNode_Array *node = PyObject_GC_New(
1637         PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1638     if (node == NULL) {
1639         return NULL;
1640     }
1641 
1642     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1643         node->a_array[i] = NULL;
1644     }
1645 
1646     node->a_count = count;
1647 
1648     _PyObject_GC_TRACK(node);
1649     return (PyHamtNode *)node;
1650 }
1651 
1652 static PyHamtNode_Array *
hamt_node_array_clone(PyHamtNode_Array * node)1653 hamt_node_array_clone(PyHamtNode_Array *node)
1654 {
1655     PyHamtNode_Array *clone;
1656     Py_ssize_t i;
1657 
1658     VALIDATE_ARRAY_NODE(node)
1659 
1660     /* Create a new Array node. */
1661     clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1662     if (clone == NULL) {
1663         return NULL;
1664     }
1665 
1666     /* Copy all elements from the current Array node to the new one. */
1667     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1668         Py_XINCREF(node->a_array[i]);
1669         clone->a_array[i] = node->a_array[i];
1670     }
1671 
1672     VALIDATE_ARRAY_NODE(clone)
1673     return clone;
1674 }
1675 
1676 static PyHamtNode *
hamt_node_array_assoc(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1677 hamt_node_array_assoc(PyHamtNode_Array *self,
1678                       uint32_t shift, int32_t hash,
1679                       PyObject *key, PyObject *val, int* added_leaf)
1680 {
1681     /* Set a new key to this level (currently a Collision node)
1682        of the tree.
1683 
1684        Array nodes don't store values, they can only point to
1685        other nodes.  They are simple arrays of 32 BaseNode pointers/
1686      */
1687 
1688     uint32_t idx = hamt_mask(hash, shift);
1689     PyHamtNode *node = self->a_array[idx];
1690     PyHamtNode *child_node;
1691     PyHamtNode_Array *new_node;
1692     Py_ssize_t i;
1693 
1694     if (node == NULL) {
1695         /* There's no child node for the given hash.  Create a new
1696            Bitmap node for this key. */
1697 
1698         PyHamtNode_Bitmap *empty = NULL;
1699 
1700         /* Get an empty Bitmap node to work with. */
1701         empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1702         if (empty == NULL) {
1703             return NULL;
1704         }
1705 
1706         /* Set key/val to the newly created empty Bitmap, thus
1707            creating a new Bitmap node with our key/value pair. */
1708         child_node = hamt_node_bitmap_assoc(
1709             empty,
1710             shift + 5, hash, key, val, added_leaf);
1711         Py_DECREF(empty);
1712         if (child_node == NULL) {
1713             return NULL;
1714         }
1715 
1716         /* Create a new Array node. */
1717         new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1718         if (new_node == NULL) {
1719             Py_DECREF(child_node);
1720             return NULL;
1721         }
1722 
1723         /* Copy all elements from the current Array node to the
1724            new one. */
1725         for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1726             Py_XINCREF(self->a_array[i]);
1727             new_node->a_array[i] = self->a_array[i];
1728         }
1729 
1730         assert(new_node->a_array[idx] == NULL);
1731         new_node->a_array[idx] = child_node;  /* borrow */
1732         VALIDATE_ARRAY_NODE(new_node)
1733     }
1734     else {
1735         /* There's a child node for the given hash.
1736            Set the key to it./ */
1737         child_node = hamt_node_assoc(
1738             node, shift + 5, hash, key, val, added_leaf);
1739         if (child_node == NULL) {
1740             return NULL;
1741         }
1742         else if (child_node == (PyHamtNode *)self) {
1743             Py_DECREF(child_node);
1744             return (PyHamtNode *)self;
1745         }
1746 
1747         new_node = hamt_node_array_clone(self);
1748         if (new_node == NULL) {
1749             Py_DECREF(child_node);
1750             return NULL;
1751         }
1752 
1753         Py_SETREF(new_node->a_array[idx], child_node);  /* borrow */
1754         VALIDATE_ARRAY_NODE(new_node)
1755     }
1756 
1757     return (PyHamtNode *)new_node;
1758 }
1759 
1760 static hamt_without_t
hamt_node_array_without(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1761 hamt_node_array_without(PyHamtNode_Array *self,
1762                         uint32_t shift, int32_t hash,
1763                         PyObject *key,
1764                         PyHamtNode **new_node)
1765 {
1766     uint32_t idx = hamt_mask(hash, shift);
1767     PyHamtNode *node = self->a_array[idx];
1768 
1769     if (node == NULL) {
1770         return W_NOT_FOUND;
1771     }
1772 
1773     PyHamtNode *sub_node = NULL;
1774     hamt_without_t res = hamt_node_without(
1775         (PyHamtNode *)node,
1776         shift + 5, hash, key, &sub_node);
1777 
1778     switch (res) {
1779         case W_NOT_FOUND:
1780         case W_ERROR:
1781             assert(sub_node == NULL);
1782             return res;
1783 
1784         case W_NEWNODE: {
1785             /* We need to replace a node at the `idx` index.
1786                Clone this node and replace.
1787             */
1788             assert(sub_node != NULL);
1789 
1790             PyHamtNode_Array *clone = hamt_node_array_clone(self);
1791             if (clone == NULL) {
1792                 Py_DECREF(sub_node);
1793                 return W_ERROR;
1794             }
1795 
1796             Py_SETREF(clone->a_array[idx], sub_node);  /* borrow */
1797             *new_node = (PyHamtNode*)clone;  /* borrow */
1798             return W_NEWNODE;
1799         }
1800 
1801         case W_EMPTY: {
1802             assert(sub_node == NULL);
1803             /* We need to remove a node at the `idx` index.
1804                Calculate the size of the replacement Array node.
1805             */
1806             Py_ssize_t new_count = self->a_count - 1;
1807 
1808             if (new_count == 0) {
1809                 return W_EMPTY;
1810             }
1811 
1812             if (new_count >= 16) {
1813                 /* We convert Bitmap nodes to Array nodes, when a
1814                    Bitmap node needs to store more than 15 key/value
1815                    pairs.  So we will create a new Array node if we
1816                    the number of key/values after deletion is still
1817                    greater than 15.
1818                 */
1819 
1820                 PyHamtNode_Array *new = hamt_node_array_clone(self);
1821                 if (new == NULL) {
1822                     return W_ERROR;
1823                 }
1824                 new->a_count = new_count;
1825                 Py_CLEAR(new->a_array[idx]);
1826 
1827                 *new_node = (PyHamtNode*)new;  /* borrow */
1828                 return W_NEWNODE;
1829             }
1830 
1831             /* New Array node would have less than 16 key/value
1832                pairs.  We need to create a replacement Bitmap node. */
1833 
1834             Py_ssize_t bitmap_size = new_count * 2;
1835             uint32_t bitmap = 0;
1836 
1837             PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1838                 hamt_node_bitmap_new(bitmap_size);
1839             if (new == NULL) {
1840                 return W_ERROR;
1841             }
1842 
1843             Py_ssize_t new_i = 0;
1844             for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1845                 if (i == idx) {
1846                     /* Skip the node we are deleting. */
1847                     continue;
1848                 }
1849 
1850                 PyHamtNode *node = self->a_array[i];
1851                 if (node == NULL) {
1852                     /* Skip any missing nodes. */
1853                     continue;
1854                 }
1855 
1856                 bitmap |= 1U << i;
1857 
1858                 if (IS_BITMAP_NODE(node)) {
1859                     PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1860 
1861                     if (hamt_node_bitmap_count(child) == 1 &&
1862                             child->b_array[0] != NULL)
1863                     {
1864                         /* node is a Bitmap with one key/value pair, just
1865                            merge it into the new Bitmap node we're building.
1866 
1867                            Note that we don't inline Bitmap nodes that
1868                            have a NULL key -- those nodes point to another
1869                            tree level, and we cannot simply move tree levels
1870                            up or down.
1871                         */
1872                         PyObject *key = child->b_array[0];
1873                         PyObject *val = child->b_array[1];
1874 
1875                         Py_INCREF(key);
1876                         new->b_array[new_i] = key;
1877                         Py_INCREF(val);
1878                         new->b_array[new_i + 1] = val;
1879                     }
1880                     else {
1881                         new->b_array[new_i] = NULL;
1882                         Py_INCREF(node);
1883                         new->b_array[new_i + 1] = (PyObject*)node;
1884                     }
1885                 }
1886                 else {
1887 
1888 #ifdef Py_DEBUG
1889                     if (IS_COLLISION_NODE(node)) {
1890                         Py_ssize_t child_count = hamt_node_collision_count(
1891                             (PyHamtNode_Collision*)node);
1892                         assert(child_count > 1);
1893                     }
1894                     else if (IS_ARRAY_NODE(node)) {
1895                         assert(((PyHamtNode_Array*)node)->a_count >= 16);
1896                     }
1897 #endif
1898 
1899                     /* Just copy the node into our new Bitmap */
1900                     new->b_array[new_i] = NULL;
1901                     Py_INCREF(node);
1902                     new->b_array[new_i + 1] = (PyObject*)node;
1903                 }
1904 
1905                 new_i += 2;
1906             }
1907 
1908             new->b_bitmap = bitmap;
1909             *new_node = (PyHamtNode*)new;  /* borrow */
1910             return W_NEWNODE;
1911         }
1912 
1913         default:
1914             Py_UNREACHABLE();
1915     }
1916 }
1917 
1918 static hamt_find_t
hamt_node_array_find(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1919 hamt_node_array_find(PyHamtNode_Array *self,
1920                      uint32_t shift, int32_t hash,
1921                      PyObject *key, PyObject **val)
1922 {
1923     /* Lookup `key` in the Array node `self`.  Set the value
1924        for the found key to 'val'. */
1925 
1926     uint32_t idx = hamt_mask(hash, shift);
1927     PyHamtNode *node;
1928 
1929     node = self->a_array[idx];
1930     if (node == NULL) {
1931         return F_NOT_FOUND;
1932     }
1933 
1934     /* Dispatch to the generic hamt_node_find */
1935     return hamt_node_find(node, shift + 5, hash, key, val);
1936 }
1937 
1938 static int
hamt_node_array_traverse(PyHamtNode_Array * self,visitproc visit,void * arg)1939 hamt_node_array_traverse(PyHamtNode_Array *self,
1940                          visitproc visit, void *arg)
1941 {
1942     /* Array's tp_traverse */
1943 
1944     Py_ssize_t i;
1945 
1946     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1947         Py_VISIT(self->a_array[i]);
1948     }
1949 
1950     return 0;
1951 }
1952 
1953 static void
hamt_node_array_dealloc(PyHamtNode_Array * self)1954 hamt_node_array_dealloc(PyHamtNode_Array *self)
1955 {
1956     /* Array's tp_dealloc */
1957 
1958     Py_ssize_t i;
1959 
1960     PyObject_GC_UnTrack(self);
1961     Py_TRASHCAN_BEGIN(self, hamt_node_array_dealloc)
1962 
1963     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1964         Py_XDECREF(self->a_array[i]);
1965     }
1966 
1967     Py_TYPE(self)->tp_free((PyObject *)self);
1968     Py_TRASHCAN_END
1969 }
1970 
1971 #ifdef Py_DEBUG
1972 static int
hamt_node_array_dump(PyHamtNode_Array * node,_PyUnicodeWriter * writer,int level)1973 hamt_node_array_dump(PyHamtNode_Array *node,
1974                      _PyUnicodeWriter *writer, int level)
1975 {
1976     /* Debug build: __dump__() method implementation for Array nodes. */
1977 
1978     Py_ssize_t i;
1979 
1980     if (_hamt_dump_ident(writer, level + 1)) {
1981         goto error;
1982     }
1983 
1984     if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1985         goto error;
1986     }
1987 
1988     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1989         if (node->a_array[i] == NULL) {
1990             continue;
1991         }
1992 
1993         if (_hamt_dump_ident(writer, level + 2)) {
1994             goto error;
1995         }
1996 
1997         if (_hamt_dump_format(writer, "%zd::\n", i)) {
1998             goto error;
1999         }
2000 
2001         if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2002             goto error;
2003         }
2004 
2005         if (_hamt_dump_format(writer, "\n")) {
2006             goto error;
2007         }
2008     }
2009 
2010     return 0;
2011 error:
2012     return -1;
2013 }
2014 #endif  /* Py_DEBUG */
2015 
2016 
2017 /////////////////////////////////// Node Dispatch
2018 
2019 
2020 static PyHamtNode *
hamt_node_assoc(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)2021 hamt_node_assoc(PyHamtNode *node,
2022                 uint32_t shift, int32_t hash,
2023                 PyObject *key, PyObject *val, int* added_leaf)
2024 {
2025     /* Set key/value to the 'node' starting with the given shift/hash.
2026        Return a new node, or the same node if key/value already
2027        set.
2028 
2029        added_leaf will be set to 1 if key/value wasn't in the
2030        tree before.
2031 
2032        This method automatically dispatches to the suitable
2033        hamt_node_{nodetype}_assoc method.
2034     */
2035 
2036     if (IS_BITMAP_NODE(node)) {
2037         return hamt_node_bitmap_assoc(
2038             (PyHamtNode_Bitmap *)node,
2039             shift, hash, key, val, added_leaf);
2040     }
2041     else if (IS_ARRAY_NODE(node)) {
2042         return hamt_node_array_assoc(
2043             (PyHamtNode_Array *)node,
2044             shift, hash, key, val, added_leaf);
2045     }
2046     else {
2047         assert(IS_COLLISION_NODE(node));
2048         return hamt_node_collision_assoc(
2049             (PyHamtNode_Collision *)node,
2050             shift, hash, key, val, added_leaf);
2051     }
2052 }
2053 
2054 static hamt_without_t
hamt_node_without(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)2055 hamt_node_without(PyHamtNode *node,
2056                   uint32_t shift, int32_t hash,
2057                   PyObject *key,
2058                   PyHamtNode **new_node)
2059 {
2060     if (IS_BITMAP_NODE(node)) {
2061         return hamt_node_bitmap_without(
2062             (PyHamtNode_Bitmap *)node,
2063             shift, hash, key,
2064             new_node);
2065     }
2066     else if (IS_ARRAY_NODE(node)) {
2067         return hamt_node_array_without(
2068             (PyHamtNode_Array *)node,
2069             shift, hash, key,
2070             new_node);
2071     }
2072     else {
2073         assert(IS_COLLISION_NODE(node));
2074         return hamt_node_collision_without(
2075             (PyHamtNode_Collision *)node,
2076             shift, hash, key,
2077             new_node);
2078     }
2079 }
2080 
2081 static hamt_find_t
hamt_node_find(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)2082 hamt_node_find(PyHamtNode *node,
2083                uint32_t shift, int32_t hash,
2084                PyObject *key, PyObject **val)
2085 {
2086     /* Find the key in the node starting with the given shift/hash.
2087 
2088        If a value is found, the result will be set to F_FOUND, and
2089        *val will point to the found value object.
2090 
2091        If a value wasn't found, the result will be set to F_NOT_FOUND.
2092 
2093        If an exception occurs during the call, the result will be F_ERROR.
2094 
2095        This method automatically dispatches to the suitable
2096        hamt_node_{nodetype}_find method.
2097     */
2098 
2099     if (IS_BITMAP_NODE(node)) {
2100         return hamt_node_bitmap_find(
2101             (PyHamtNode_Bitmap *)node,
2102             shift, hash, key, val);
2103 
2104     }
2105     else if (IS_ARRAY_NODE(node)) {
2106         return hamt_node_array_find(
2107             (PyHamtNode_Array *)node,
2108             shift, hash, key, val);
2109     }
2110     else {
2111         assert(IS_COLLISION_NODE(node));
2112         return hamt_node_collision_find(
2113             (PyHamtNode_Collision *)node,
2114             shift, hash, key, val);
2115     }
2116 }
2117 
2118 #ifdef Py_DEBUG
2119 static int
hamt_node_dump(PyHamtNode * node,_PyUnicodeWriter * writer,int level)2120 hamt_node_dump(PyHamtNode *node,
2121                _PyUnicodeWriter *writer, int level)
2122 {
2123     /* Debug build: __dump__() method implementation for a node.
2124 
2125        This method automatically dispatches to the suitable
2126        hamt_node_{nodetype})_dump method.
2127     */
2128 
2129     if (IS_BITMAP_NODE(node)) {
2130         return hamt_node_bitmap_dump(
2131             (PyHamtNode_Bitmap *)node, writer, level);
2132     }
2133     else if (IS_ARRAY_NODE(node)) {
2134         return hamt_node_array_dump(
2135             (PyHamtNode_Array *)node, writer, level);
2136     }
2137     else {
2138         assert(IS_COLLISION_NODE(node));
2139         return hamt_node_collision_dump(
2140             (PyHamtNode_Collision *)node, writer, level);
2141     }
2142 }
2143 #endif  /* Py_DEBUG */
2144 
2145 
2146 /////////////////////////////////// Iterators: Machinery
2147 
2148 
2149 static hamt_iter_t
2150 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2151 
2152 
2153 static void
hamt_iterator_init(PyHamtIteratorState * iter,PyHamtNode * root)2154 hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2155 {
2156     for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2157         iter->i_nodes[i] = NULL;
2158         iter->i_pos[i] = 0;
2159     }
2160 
2161     iter->i_level = 0;
2162 
2163     /* Note: we don't incref/decref nodes in i_nodes. */
2164     iter->i_nodes[0] = root;
2165 }
2166 
2167 static hamt_iter_t
hamt_iterator_bitmap_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2168 hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2169                           PyObject **key, PyObject **val)
2170 {
2171     int8_t level = iter->i_level;
2172 
2173     PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2174     Py_ssize_t pos = iter->i_pos[level];
2175 
2176     if (pos + 1 >= Py_SIZE(node)) {
2177 #ifdef Py_DEBUG
2178         assert(iter->i_level >= 0);
2179         iter->i_nodes[iter->i_level] = NULL;
2180 #endif
2181         iter->i_level--;
2182         return hamt_iterator_next(iter, key, val);
2183     }
2184 
2185     if (node->b_array[pos] == NULL) {
2186         iter->i_pos[level] = pos + 2;
2187 
2188         int8_t next_level = level + 1;
2189         assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2190         iter->i_level = next_level;
2191         iter->i_pos[next_level] = 0;
2192         iter->i_nodes[next_level] = (PyHamtNode *)
2193             node->b_array[pos + 1];
2194 
2195         return hamt_iterator_next(iter, key, val);
2196     }
2197 
2198     *key = node->b_array[pos];
2199     *val = node->b_array[pos + 1];
2200     iter->i_pos[level] = pos + 2;
2201     return I_ITEM;
2202 }
2203 
2204 static hamt_iter_t
hamt_iterator_collision_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2205 hamt_iterator_collision_next(PyHamtIteratorState *iter,
2206                              PyObject **key, PyObject **val)
2207 {
2208     int8_t level = iter->i_level;
2209 
2210     PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2211     Py_ssize_t pos = iter->i_pos[level];
2212 
2213     if (pos + 1 >= Py_SIZE(node)) {
2214 #ifdef Py_DEBUG
2215         assert(iter->i_level >= 0);
2216         iter->i_nodes[iter->i_level] = NULL;
2217 #endif
2218         iter->i_level--;
2219         return hamt_iterator_next(iter, key, val);
2220     }
2221 
2222     *key = node->c_array[pos];
2223     *val = node->c_array[pos + 1];
2224     iter->i_pos[level] = pos + 2;
2225     return I_ITEM;
2226 }
2227 
2228 static hamt_iter_t
hamt_iterator_array_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2229 hamt_iterator_array_next(PyHamtIteratorState *iter,
2230                          PyObject **key, PyObject **val)
2231 {
2232     int8_t level = iter->i_level;
2233 
2234     PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2235     Py_ssize_t pos = iter->i_pos[level];
2236 
2237     if (pos >= HAMT_ARRAY_NODE_SIZE) {
2238 #ifdef Py_DEBUG
2239         assert(iter->i_level >= 0);
2240         iter->i_nodes[iter->i_level] = NULL;
2241 #endif
2242         iter->i_level--;
2243         return hamt_iterator_next(iter, key, val);
2244     }
2245 
2246     for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2247         if (node->a_array[i] != NULL) {
2248             iter->i_pos[level] = i + 1;
2249 
2250             int8_t next_level = level + 1;
2251             assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2252             iter->i_pos[next_level] = 0;
2253             iter->i_nodes[next_level] = node->a_array[i];
2254             iter->i_level = next_level;
2255 
2256             return hamt_iterator_next(iter, key, val);
2257         }
2258     }
2259 
2260 #ifdef Py_DEBUG
2261         assert(iter->i_level >= 0);
2262         iter->i_nodes[iter->i_level] = NULL;
2263 #endif
2264 
2265     iter->i_level--;
2266     return hamt_iterator_next(iter, key, val);
2267 }
2268 
2269 static hamt_iter_t
hamt_iterator_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2270 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2271 {
2272     if (iter->i_level < 0) {
2273         return I_END;
2274     }
2275 
2276     assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2277 
2278     PyHamtNode *current = iter->i_nodes[iter->i_level];
2279 
2280     if (IS_BITMAP_NODE(current)) {
2281         return hamt_iterator_bitmap_next(iter, key, val);
2282     }
2283     else if (IS_ARRAY_NODE(current)) {
2284         return hamt_iterator_array_next(iter, key, val);
2285     }
2286     else {
2287         assert(IS_COLLISION_NODE(current));
2288         return hamt_iterator_collision_next(iter, key, val);
2289     }
2290 }
2291 
2292 
2293 /////////////////////////////////// HAMT high-level functions
2294 
2295 
2296 PyHamtObject *
_PyHamt_Assoc(PyHamtObject * o,PyObject * key,PyObject * val)2297 _PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2298 {
2299     int32_t key_hash;
2300     int added_leaf = 0;
2301     PyHamtNode *new_root;
2302     PyHamtObject *new_o;
2303 
2304     key_hash = hamt_hash(key);
2305     if (key_hash == -1) {
2306         return NULL;
2307     }
2308 
2309     new_root = hamt_node_assoc(
2310         (PyHamtNode *)(o->h_root),
2311         0, key_hash, key, val, &added_leaf);
2312     if (new_root == NULL) {
2313         return NULL;
2314     }
2315 
2316     if (new_root == o->h_root) {
2317         Py_DECREF(new_root);
2318         Py_INCREF(o);
2319         return o;
2320     }
2321 
2322     new_o = hamt_alloc();
2323     if (new_o == NULL) {
2324         Py_DECREF(new_root);
2325         return NULL;
2326     }
2327 
2328     new_o->h_root = new_root;  /* borrow */
2329     new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2330 
2331     return new_o;
2332 }
2333 
2334 PyHamtObject *
_PyHamt_Without(PyHamtObject * o,PyObject * key)2335 _PyHamt_Without(PyHamtObject *o, PyObject *key)
2336 {
2337     int32_t key_hash = hamt_hash(key);
2338     if (key_hash == -1) {
2339         return NULL;
2340     }
2341 
2342     PyHamtNode *new_root = NULL;
2343 
2344     hamt_without_t res = hamt_node_without(
2345         (PyHamtNode *)(o->h_root),
2346         0, key_hash, key,
2347         &new_root);
2348 
2349     switch (res) {
2350         case W_ERROR:
2351             return NULL;
2352         case W_EMPTY:
2353             return _PyHamt_New();
2354         case W_NOT_FOUND:
2355             Py_INCREF(o);
2356             return o;
2357         case W_NEWNODE: {
2358             assert(new_root != NULL);
2359 
2360             PyHamtObject *new_o = hamt_alloc();
2361             if (new_o == NULL) {
2362                 Py_DECREF(new_root);
2363                 return NULL;
2364             }
2365 
2366             new_o->h_root = new_root;  /* borrow */
2367             new_o->h_count = o->h_count - 1;
2368             assert(new_o->h_count >= 0);
2369             return new_o;
2370         }
2371         default:
2372             Py_UNREACHABLE();
2373     }
2374 }
2375 
2376 static hamt_find_t
hamt_find(PyHamtObject * o,PyObject * key,PyObject ** val)2377 hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2378 {
2379     if (o->h_count == 0) {
2380         return F_NOT_FOUND;
2381     }
2382 
2383     int32_t key_hash = hamt_hash(key);
2384     if (key_hash == -1) {
2385         return F_ERROR;
2386     }
2387 
2388     return hamt_node_find(o->h_root, 0, key_hash, key, val);
2389 }
2390 
2391 
2392 int
_PyHamt_Find(PyHamtObject * o,PyObject * key,PyObject ** val)2393 _PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2394 {
2395     hamt_find_t res = hamt_find(o, key, val);
2396     switch (res) {
2397         case F_ERROR:
2398             return -1;
2399         case F_NOT_FOUND:
2400             return 0;
2401         case F_FOUND:
2402             return 1;
2403         default:
2404             Py_UNREACHABLE();
2405     }
2406 }
2407 
2408 
2409 int
_PyHamt_Eq(PyHamtObject * v,PyHamtObject * w)2410 _PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2411 {
2412     if (v == w) {
2413         return 1;
2414     }
2415 
2416     if (v->h_count != w->h_count) {
2417         return 0;
2418     }
2419 
2420     PyHamtIteratorState iter;
2421     hamt_iter_t iter_res;
2422     hamt_find_t find_res;
2423     PyObject *v_key;
2424     PyObject *v_val;
2425     PyObject *w_val;
2426 
2427     hamt_iterator_init(&iter, v->h_root);
2428 
2429     do {
2430         iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2431         if (iter_res == I_ITEM) {
2432             find_res = hamt_find(w, v_key, &w_val);
2433             switch (find_res) {
2434                 case F_ERROR:
2435                     return -1;
2436 
2437                 case F_NOT_FOUND:
2438                     return 0;
2439 
2440                 case F_FOUND: {
2441                     int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2442                     if (cmp < 0) {
2443                         return -1;
2444                     }
2445                     if (cmp == 0) {
2446                         return 0;
2447                     }
2448                 }
2449             }
2450         }
2451     } while (iter_res != I_END);
2452 
2453     return 1;
2454 }
2455 
2456 Py_ssize_t
_PyHamt_Len(PyHamtObject * o)2457 _PyHamt_Len(PyHamtObject *o)
2458 {
2459     return o->h_count;
2460 }
2461 
2462 static PyHamtObject *
hamt_alloc(void)2463 hamt_alloc(void)
2464 {
2465     PyHamtObject *o;
2466     o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2467     if (o == NULL) {
2468         return NULL;
2469     }
2470     o->h_count = 0;
2471     o->h_root = NULL;
2472     o->h_weakreflist = NULL;
2473     PyObject_GC_Track(o);
2474     return o;
2475 }
2476 
2477 PyHamtObject *
_PyHamt_New(void)2478 _PyHamt_New(void)
2479 {
2480     if (_empty_hamt != NULL) {
2481         /* HAMT is an immutable object so we can easily cache an
2482            empty instance. */
2483         Py_INCREF(_empty_hamt);
2484         return _empty_hamt;
2485     }
2486 
2487     PyHamtObject *o = hamt_alloc();
2488     if (o == NULL) {
2489         return NULL;
2490     }
2491 
2492     o->h_root = hamt_node_bitmap_new(0);
2493     if (o->h_root == NULL) {
2494         Py_DECREF(o);
2495         return NULL;
2496     }
2497 
2498     o->h_count = 0;
2499 
2500     if (_empty_hamt == NULL) {
2501         Py_INCREF(o);
2502         _empty_hamt = o;
2503     }
2504 
2505     return o;
2506 }
2507 
2508 #ifdef Py_DEBUG
2509 static PyObject *
hamt_dump(PyHamtObject * self)2510 hamt_dump(PyHamtObject *self)
2511 {
2512     _PyUnicodeWriter writer;
2513 
2514     _PyUnicodeWriter_Init(&writer);
2515 
2516     if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2517         goto error;
2518     }
2519 
2520     if (hamt_node_dump(self->h_root, &writer, 0)) {
2521         goto error;
2522     }
2523 
2524     return _PyUnicodeWriter_Finish(&writer);
2525 
2526 error:
2527     _PyUnicodeWriter_Dealloc(&writer);
2528     return NULL;
2529 }
2530 #endif  /* Py_DEBUG */
2531 
2532 
2533 /////////////////////////////////// Iterators: Shared Iterator Implementation
2534 
2535 
2536 static int
hamt_baseiter_tp_clear(PyHamtIterator * it)2537 hamt_baseiter_tp_clear(PyHamtIterator *it)
2538 {
2539     Py_CLEAR(it->hi_obj);
2540     return 0;
2541 }
2542 
2543 static void
hamt_baseiter_tp_dealloc(PyHamtIterator * it)2544 hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2545 {
2546     PyObject_GC_UnTrack(it);
2547     (void)hamt_baseiter_tp_clear(it);
2548     PyObject_GC_Del(it);
2549 }
2550 
2551 static int
hamt_baseiter_tp_traverse(PyHamtIterator * it,visitproc visit,void * arg)2552 hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2553 {
2554     Py_VISIT(it->hi_obj);
2555     return 0;
2556 }
2557 
2558 static PyObject *
hamt_baseiter_tp_iternext(PyHamtIterator * it)2559 hamt_baseiter_tp_iternext(PyHamtIterator *it)
2560 {
2561     PyObject *key;
2562     PyObject *val;
2563     hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2564 
2565     switch (res) {
2566         case I_END:
2567             PyErr_SetNone(PyExc_StopIteration);
2568             return NULL;
2569 
2570         case I_ITEM: {
2571             return (*(it->hi_yield))(key, val);
2572         }
2573 
2574         default: {
2575             Py_UNREACHABLE();
2576         }
2577     }
2578 }
2579 
2580 static Py_ssize_t
hamt_baseiter_tp_len(PyHamtIterator * it)2581 hamt_baseiter_tp_len(PyHamtIterator *it)
2582 {
2583     return it->hi_obj->h_count;
2584 }
2585 
2586 static PyMappingMethods PyHamtIterator_as_mapping = {
2587     (lenfunc)hamt_baseiter_tp_len,
2588 };
2589 
2590 static PyObject *
hamt_baseiter_new(PyTypeObject * type,binaryfunc yield,PyHamtObject * o)2591 hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2592 {
2593     PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2594     if (it == NULL) {
2595         return NULL;
2596     }
2597 
2598     Py_INCREF(o);
2599     it->hi_obj = o;
2600     it->hi_yield = yield;
2601 
2602     hamt_iterator_init(&it->hi_iter, o->h_root);
2603 
2604     return (PyObject*)it;
2605 }
2606 
2607 #define ITERATOR_TYPE_SHARED_SLOTS                              \
2608     .tp_basicsize = sizeof(PyHamtIterator),                     \
2609     .tp_itemsize = 0,                                           \
2610     .tp_as_mapping = &PyHamtIterator_as_mapping,                \
2611     .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc,         \
2612     .tp_getattro = PyObject_GenericGetAttr,                     \
2613     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,        \
2614     .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse,     \
2615     .tp_clear = (inquiry)hamt_baseiter_tp_clear,                \
2616     .tp_iter = PyObject_SelfIter,                               \
2617     .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2618 
2619 
2620 /////////////////////////////////// _PyHamtItems_Type
2621 
2622 
2623 PyTypeObject _PyHamtItems_Type = {
2624     PyVarObject_HEAD_INIT(NULL, 0)
2625     "items",
2626     ITERATOR_TYPE_SHARED_SLOTS
2627 };
2628 
2629 static PyObject *
hamt_iter_yield_items(PyObject * key,PyObject * val)2630 hamt_iter_yield_items(PyObject *key, PyObject *val)
2631 {
2632     return PyTuple_Pack(2, key, val);
2633 }
2634 
2635 PyObject *
_PyHamt_NewIterItems(PyHamtObject * o)2636 _PyHamt_NewIterItems(PyHamtObject *o)
2637 {
2638     return hamt_baseiter_new(
2639         &_PyHamtItems_Type, hamt_iter_yield_items, o);
2640 }
2641 
2642 
2643 /////////////////////////////////// _PyHamtKeys_Type
2644 
2645 
2646 PyTypeObject _PyHamtKeys_Type = {
2647     PyVarObject_HEAD_INIT(NULL, 0)
2648     "keys",
2649     ITERATOR_TYPE_SHARED_SLOTS
2650 };
2651 
2652 static PyObject *
hamt_iter_yield_keys(PyObject * key,PyObject * val)2653 hamt_iter_yield_keys(PyObject *key, PyObject *val)
2654 {
2655     Py_INCREF(key);
2656     return key;
2657 }
2658 
2659 PyObject *
_PyHamt_NewIterKeys(PyHamtObject * o)2660 _PyHamt_NewIterKeys(PyHamtObject *o)
2661 {
2662     return hamt_baseiter_new(
2663         &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2664 }
2665 
2666 
2667 /////////////////////////////////// _PyHamtValues_Type
2668 
2669 
2670 PyTypeObject _PyHamtValues_Type = {
2671     PyVarObject_HEAD_INIT(NULL, 0)
2672     "values",
2673     ITERATOR_TYPE_SHARED_SLOTS
2674 };
2675 
2676 static PyObject *
hamt_iter_yield_values(PyObject * key,PyObject * val)2677 hamt_iter_yield_values(PyObject *key, PyObject *val)
2678 {
2679     Py_INCREF(val);
2680     return val;
2681 }
2682 
2683 PyObject *
_PyHamt_NewIterValues(PyHamtObject * o)2684 _PyHamt_NewIterValues(PyHamtObject *o)
2685 {
2686     return hamt_baseiter_new(
2687         &_PyHamtValues_Type, hamt_iter_yield_values, o);
2688 }
2689 
2690 
2691 /////////////////////////////////// _PyHamt_Type
2692 
2693 
2694 #ifdef Py_DEBUG
2695 static PyObject *
2696 hamt_dump(PyHamtObject *self);
2697 #endif
2698 
2699 
2700 static PyObject *
hamt_tp_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2701 hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2702 {
2703     return (PyObject*)_PyHamt_New();
2704 }
2705 
2706 static int
hamt_tp_clear(PyHamtObject * self)2707 hamt_tp_clear(PyHamtObject *self)
2708 {
2709     Py_CLEAR(self->h_root);
2710     return 0;
2711 }
2712 
2713 
2714 static int
hamt_tp_traverse(PyHamtObject * self,visitproc visit,void * arg)2715 hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2716 {
2717     Py_VISIT(self->h_root);
2718     return 0;
2719 }
2720 
2721 static void
hamt_tp_dealloc(PyHamtObject * self)2722 hamt_tp_dealloc(PyHamtObject *self)
2723 {
2724     PyObject_GC_UnTrack(self);
2725     if (self->h_weakreflist != NULL) {
2726         PyObject_ClearWeakRefs((PyObject*)self);
2727     }
2728     (void)hamt_tp_clear(self);
2729     Py_TYPE(self)->tp_free(self);
2730 }
2731 
2732 
2733 static PyObject *
hamt_tp_richcompare(PyObject * v,PyObject * w,int op)2734 hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2735 {
2736     if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2737         Py_RETURN_NOTIMPLEMENTED;
2738     }
2739 
2740     int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2741     if (res < 0) {
2742         return NULL;
2743     }
2744 
2745     if (op == Py_NE) {
2746         res = !res;
2747     }
2748 
2749     if (res) {
2750         Py_RETURN_TRUE;
2751     }
2752     else {
2753         Py_RETURN_FALSE;
2754     }
2755 }
2756 
2757 static int
hamt_tp_contains(PyHamtObject * self,PyObject * key)2758 hamt_tp_contains(PyHamtObject *self, PyObject *key)
2759 {
2760     PyObject *val;
2761     return _PyHamt_Find(self, key, &val);
2762 }
2763 
2764 static PyObject *
hamt_tp_subscript(PyHamtObject * self,PyObject * key)2765 hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2766 {
2767     PyObject *val;
2768     hamt_find_t res = hamt_find(self, key, &val);
2769     switch (res) {
2770         case F_ERROR:
2771             return NULL;
2772         case F_FOUND:
2773             Py_INCREF(val);
2774             return val;
2775         case F_NOT_FOUND:
2776             PyErr_SetObject(PyExc_KeyError, key);
2777             return NULL;
2778         default:
2779             Py_UNREACHABLE();
2780     }
2781 }
2782 
2783 static Py_ssize_t
hamt_tp_len(PyHamtObject * self)2784 hamt_tp_len(PyHamtObject *self)
2785 {
2786     return _PyHamt_Len(self);
2787 }
2788 
2789 static PyObject *
hamt_tp_iter(PyHamtObject * self)2790 hamt_tp_iter(PyHamtObject *self)
2791 {
2792     return _PyHamt_NewIterKeys(self);
2793 }
2794 
2795 static PyObject *
hamt_py_set(PyHamtObject * self,PyObject * args)2796 hamt_py_set(PyHamtObject *self, PyObject *args)
2797 {
2798     PyObject *key;
2799     PyObject *val;
2800 
2801     if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2802         return NULL;
2803     }
2804 
2805     return (PyObject *)_PyHamt_Assoc(self, key, val);
2806 }
2807 
2808 static PyObject *
hamt_py_get(PyHamtObject * self,PyObject * args)2809 hamt_py_get(PyHamtObject *self, PyObject *args)
2810 {
2811     PyObject *key;
2812     PyObject *def = NULL;
2813 
2814     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2815         return NULL;
2816     }
2817 
2818     PyObject *val = NULL;
2819     hamt_find_t res = hamt_find(self, key, &val);
2820     switch (res) {
2821         case F_ERROR:
2822             return NULL;
2823         case F_FOUND:
2824             Py_INCREF(val);
2825             return val;
2826         case F_NOT_FOUND:
2827             if (def == NULL) {
2828                 Py_RETURN_NONE;
2829             }
2830             Py_INCREF(def);
2831             return def;
2832         default:
2833             Py_UNREACHABLE();
2834     }
2835 }
2836 
2837 static PyObject *
hamt_py_delete(PyHamtObject * self,PyObject * key)2838 hamt_py_delete(PyHamtObject *self, PyObject *key)
2839 {
2840     return (PyObject *)_PyHamt_Without(self, key);
2841 }
2842 
2843 static PyObject *
hamt_py_items(PyHamtObject * self,PyObject * args)2844 hamt_py_items(PyHamtObject *self, PyObject *args)
2845 {
2846     return _PyHamt_NewIterItems(self);
2847 }
2848 
2849 static PyObject *
hamt_py_values(PyHamtObject * self,PyObject * args)2850 hamt_py_values(PyHamtObject *self, PyObject *args)
2851 {
2852     return _PyHamt_NewIterValues(self);
2853 }
2854 
2855 static PyObject *
hamt_py_keys(PyHamtObject * self,PyObject * Py_UNUSED (args))2856 hamt_py_keys(PyHamtObject *self, PyObject *Py_UNUSED(args))
2857 {
2858     return _PyHamt_NewIterKeys(self);
2859 }
2860 
2861 #ifdef Py_DEBUG
2862 static PyObject *
hamt_py_dump(PyHamtObject * self,PyObject * Py_UNUSED (args))2863 hamt_py_dump(PyHamtObject *self, PyObject *Py_UNUSED(args))
2864 {
2865     return hamt_dump(self);
2866 }
2867 #endif
2868 
2869 
2870 static PyMethodDef PyHamt_methods[] = {
2871     {"set", _PyCFunction_CAST(hamt_py_set), METH_VARARGS, NULL},
2872     {"get", _PyCFunction_CAST(hamt_py_get), METH_VARARGS, NULL},
2873     {"delete", _PyCFunction_CAST(hamt_py_delete), METH_O, NULL},
2874     {"items", _PyCFunction_CAST(hamt_py_items), METH_NOARGS, NULL},
2875     {"keys", _PyCFunction_CAST(hamt_py_keys), METH_NOARGS, NULL},
2876     {"values", _PyCFunction_CAST(hamt_py_values), METH_NOARGS, NULL},
2877 #ifdef Py_DEBUG
2878     {"__dump__", _PyCFunction_CAST(hamt_py_dump), METH_NOARGS, NULL},
2879 #endif
2880     {NULL, NULL}
2881 };
2882 
2883 static PySequenceMethods PyHamt_as_sequence = {
2884     0,                                /* sq_length */
2885     0,                                /* sq_concat */
2886     0,                                /* sq_repeat */
2887     0,                                /* sq_item */
2888     0,                                /* sq_slice */
2889     0,                                /* sq_ass_item */
2890     0,                                /* sq_ass_slice */
2891     (objobjproc)hamt_tp_contains,     /* sq_contains */
2892     0,                                /* sq_inplace_concat */
2893     0,                                /* sq_inplace_repeat */
2894 };
2895 
2896 static PyMappingMethods PyHamt_as_mapping = {
2897     (lenfunc)hamt_tp_len,             /* mp_length */
2898     (binaryfunc)hamt_tp_subscript,    /* mp_subscript */
2899 };
2900 
2901 PyTypeObject _PyHamt_Type = {
2902     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2903     "hamt",
2904     sizeof(PyHamtObject),
2905     .tp_methods = PyHamt_methods,
2906     .tp_as_mapping = &PyHamt_as_mapping,
2907     .tp_as_sequence = &PyHamt_as_sequence,
2908     .tp_iter = (getiterfunc)hamt_tp_iter,
2909     .tp_dealloc = (destructor)hamt_tp_dealloc,
2910     .tp_getattro = PyObject_GenericGetAttr,
2911     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2912     .tp_richcompare = hamt_tp_richcompare,
2913     .tp_traverse = (traverseproc)hamt_tp_traverse,
2914     .tp_clear = (inquiry)hamt_tp_clear,
2915     .tp_new = hamt_tp_new,
2916     .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2917     .tp_hash = PyObject_HashNotImplemented,
2918 };
2919 
2920 
2921 /////////////////////////////////// Tree Node Types
2922 
2923 
2924 PyTypeObject _PyHamt_ArrayNode_Type = {
2925     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2926     "hamt_array_node",
2927     sizeof(PyHamtNode_Array),
2928     0,
2929     .tp_dealloc = (destructor)hamt_node_array_dealloc,
2930     .tp_getattro = PyObject_GenericGetAttr,
2931     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2932     .tp_traverse = (traverseproc)hamt_node_array_traverse,
2933     .tp_free = PyObject_GC_Del,
2934     .tp_hash = PyObject_HashNotImplemented,
2935 };
2936 
2937 PyTypeObject _PyHamt_BitmapNode_Type = {
2938     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2939     "hamt_bitmap_node",
2940     sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2941     sizeof(PyObject *),
2942     .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2943     .tp_getattro = PyObject_GenericGetAttr,
2944     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2945     .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2946     .tp_free = PyObject_GC_Del,
2947     .tp_hash = PyObject_HashNotImplemented,
2948 };
2949 
2950 PyTypeObject _PyHamt_CollisionNode_Type = {
2951     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2952     "hamt_collision_node",
2953     sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2954     sizeof(PyObject *),
2955     .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2956     .tp_getattro = PyObject_GenericGetAttr,
2957     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2958     .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2959     .tp_free = PyObject_GC_Del,
2960     .tp_hash = PyObject_HashNotImplemented,
2961 };
2962 
2963 
2964 void
_PyHamt_Fini(PyInterpreterState * interp)2965 _PyHamt_Fini(PyInterpreterState *interp)
2966 {
2967     Py_CLEAR(_empty_hamt);
2968     Py_CLEAR(_empty_bitmap_node);
2969 }
2970