1 
2 /* set object implementation
3 
4    Written and maintained by Raymond D. Hettinger <[email protected]>
5    Derived from Lib/sets.py and Objects/dictobject.c.
6 
7    The basic lookup function used by all operations.
8    This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9 
10    The initial probe index is computed as hash mod the table size.
11    Subsequent probe indices are computed as explained in Objects/dictobject.c.
12 
13    To improve cache locality, each probe inspects a series of consecutive
14    nearby entries before moving on to probes elsewhere in memory.  This leaves
15    us with a hybrid of linear probing and randomized probing.  The linear probing
16    reduces the cost of hash collisions because consecutive memory accesses
17    tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18    we then use more of the upper bits from the hash value and apply a simple
19    linear congruential random number generator.  This helps break-up long
20    chains of collisions.
21 
22    All arithmetic on hash should ignore overflow.
23 
24    Unlike the dictionary implementation, the lookkey function can return
25    NULL if the rich comparison returns an error.
26 
27    Use cases for sets differ considerably from dictionaries where looked-up
28    keys are more likely to be present.  In contrast, sets are primarily
29    about membership testing where the presence of an element is not known in
30    advance.  Accordingly, the set implementation needs to optimize for both
31    the found and not-found case.
32 */
33 
34 #include "Python.h"
35 #include "pycore_object.h"        // _PyObject_GC_UNTRACK()
36 #include <stddef.h>               // offsetof()
37 
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40 
41 #define dummy (&_dummy_struct)
42 
43 
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46 
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51 
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54 
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58     setentry *table;
59     setentry *entry;
60     size_t perturb = hash;
61     size_t mask = so->mask;
62     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63     int probes;
64     int cmp;
65 
66     while (1) {
67         entry = &so->table[i];
68         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69         do {
70             if (entry->hash == 0 && entry->key == NULL)
71                 return entry;
72             if (entry->hash == hash) {
73                 PyObject *startkey = entry->key;
74                 assert(startkey != dummy);
75                 if (startkey == key)
76                     return entry;
77                 if (PyUnicode_CheckExact(startkey)
78                     && PyUnicode_CheckExact(key)
79                     && _PyUnicode_EQ(startkey, key))
80                     return entry;
81                 table = so->table;
82                 Py_INCREF(startkey);
83                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84                 Py_DECREF(startkey);
85                 if (cmp < 0)
86                     return NULL;
87                 if (table != so->table || entry->key != startkey)
88                     return set_lookkey(so, key, hash);
89                 if (cmp > 0)
90                     return entry;
91                 mask = so->mask;
92             }
93             entry++;
94         } while (probes--);
95         perturb >>= PERTURB_SHIFT;
96         i = (i * 5 + 1 + perturb) & mask;
97     }
98 }
99 
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101 
102 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105     setentry *table;
106     setentry *freeslot;
107     setentry *entry;
108     size_t perturb;
109     size_t mask;
110     size_t i;                       /* Unsigned for defined overflow behavior */
111     int probes;
112     int cmp;
113 
114     /* Pre-increment is necessary to prevent arbitrary code in the rich
115        comparison from deallocating the key just before the insertion. */
116     Py_INCREF(key);
117 
118   restart:
119 
120     mask = so->mask;
121     i = (size_t)hash & mask;
122     freeslot = NULL;
123     perturb = hash;
124 
125     while (1) {
126         entry = &so->table[i];
127         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
128         do {
129             if (entry->hash == 0 && entry->key == NULL)
130                 goto found_unused_or_dummy;
131             if (entry->hash == hash) {
132                 PyObject *startkey = entry->key;
133                 assert(startkey != dummy);
134                 if (startkey == key)
135                     goto found_active;
136                 if (PyUnicode_CheckExact(startkey)
137                     && PyUnicode_CheckExact(key)
138                     && _PyUnicode_EQ(startkey, key))
139                     goto found_active;
140                 table = so->table;
141                 Py_INCREF(startkey);
142                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143                 Py_DECREF(startkey);
144                 if (cmp > 0)
145                     goto found_active;
146                 if (cmp < 0)
147                     goto comparison_error;
148                 if (table != so->table || entry->key != startkey)
149                     goto restart;
150                 mask = so->mask;
151             }
152             else if (entry->hash == -1) {
153                 assert (entry->key == dummy);
154                 freeslot = entry;
155             }
156             entry++;
157         } while (probes--);
158         perturb >>= PERTURB_SHIFT;
159         i = (i * 5 + 1 + perturb) & mask;
160     }
161 
162   found_unused_or_dummy:
163     if (freeslot == NULL)
164         goto found_unused;
165     so->used++;
166     freeslot->key = key;
167     freeslot->hash = hash;
168     return 0;
169 
170   found_unused:
171     so->fill++;
172     so->used++;
173     entry->key = key;
174     entry->hash = hash;
175     if ((size_t)so->fill*5 < mask*3)
176         return 0;
177     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
178 
179   found_active:
180     Py_DECREF(key);
181     return 0;
182 
183   comparison_error:
184     Py_DECREF(key);
185     return -1;
186 }
187 
188 /*
189 Internal routine used by set_table_resize() to insert an item which is
190 known to be absent from the set.  Besides the performance benefit,
191 there is also safety benefit since using set_add_entry() risks making
192 a callback in the middle of a set_table_resize(), see issue 1456209.
193 The caller is responsible for updating the key's reference count and
194 the setobject's fill and used fields.
195 */
196 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)197 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
198 {
199     setentry *entry;
200     size_t perturb = hash;
201     size_t i = (size_t)hash & mask;
202     size_t j;
203 
204     while (1) {
205         entry = &table[i];
206         if (entry->key == NULL)
207             goto found_null;
208         if (i + LINEAR_PROBES <= mask) {
209             for (j = 0; j < LINEAR_PROBES; j++) {
210                 entry++;
211                 if (entry->key == NULL)
212                     goto found_null;
213             }
214         }
215         perturb >>= PERTURB_SHIFT;
216         i = (i * 5 + 1 + perturb) & mask;
217     }
218   found_null:
219     entry->key = key;
220     entry->hash = hash;
221 }
222 
223 /* ======== End logic for probing the hash table ========================== */
224 /* ======================================================================== */
225 
226 /*
227 Restructure the table by allocating a new table and reinserting all
228 keys again.  When entries have been deleted, the new table may
229 actually be smaller than the old one.
230 */
231 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)232 set_table_resize(PySetObject *so, Py_ssize_t minused)
233 {
234     setentry *oldtable, *newtable, *entry;
235     Py_ssize_t oldmask = so->mask;
236     size_t newmask;
237     int is_oldtable_malloced;
238     setentry small_copy[PySet_MINSIZE];
239 
240     assert(minused >= 0);
241 
242     /* Find the smallest table size > minused. */
243     /* XXX speed-up with intrinsics */
244     size_t newsize = PySet_MINSIZE;
245     while (newsize <= (size_t)minused) {
246         newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
247     }
248 
249     /* Get space for a new table. */
250     oldtable = so->table;
251     assert(oldtable != NULL);
252     is_oldtable_malloced = oldtable != so->smalltable;
253 
254     if (newsize == PySet_MINSIZE) {
255         /* A large table is shrinking, or we can't get any smaller. */
256         newtable = so->smalltable;
257         if (newtable == oldtable) {
258             if (so->fill == so->used) {
259                 /* No dummies, so no point doing anything. */
260                 return 0;
261             }
262             /* We're not going to resize it, but rebuild the
263                table anyway to purge old dummy entries.
264                Subtle:  This is *necessary* if fill==size,
265                as set_lookkey needs at least one virgin slot to
266                terminate failing searches.  If fill < size, it's
267                merely desirable, as dummies slow searches. */
268             assert(so->fill > so->used);
269             memcpy(small_copy, oldtable, sizeof(small_copy));
270             oldtable = small_copy;
271         }
272     }
273     else {
274         newtable = PyMem_NEW(setentry, newsize);
275         if (newtable == NULL) {
276             PyErr_NoMemory();
277             return -1;
278         }
279     }
280 
281     /* Make the set empty, using the new table. */
282     assert(newtable != oldtable);
283     memset(newtable, 0, sizeof(setentry) * newsize);
284     so->mask = newsize - 1;
285     so->table = newtable;
286 
287     /* Copy the data over; this is refcount-neutral for active entries;
288        dummy entries aren't copied over, of course */
289     newmask = (size_t)so->mask;
290     if (so->fill == so->used) {
291         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
292             if (entry->key != NULL) {
293                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
294             }
295         }
296     } else {
297         so->fill = so->used;
298         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
299             if (entry->key != NULL && entry->key != dummy) {
300                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
301             }
302         }
303     }
304 
305     if (is_oldtable_malloced)
306         PyMem_Free(oldtable);
307     return 0;
308 }
309 
310 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313     setentry *entry;
314 
315     entry = set_lookkey(so, key, hash);
316     if (entry != NULL)
317         return entry->key != NULL;
318     return -1;
319 }
320 
321 #define DISCARD_NOTFOUND 0
322 #define DISCARD_FOUND 1
323 
324 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)325 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
326 {
327     setentry *entry;
328     PyObject *old_key;
329 
330     entry = set_lookkey(so, key, hash);
331     if (entry == NULL)
332         return -1;
333     if (entry->key == NULL)
334         return DISCARD_NOTFOUND;
335     old_key = entry->key;
336     entry->key = dummy;
337     entry->hash = -1;
338     so->used--;
339     Py_DECREF(old_key);
340     return DISCARD_FOUND;
341 }
342 
343 static int
set_add_key(PySetObject * so,PyObject * key)344 set_add_key(PySetObject *so, PyObject *key)
345 {
346     Py_hash_t hash;
347 
348     if (!PyUnicode_CheckExact(key) ||
349         (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
350         hash = PyObject_Hash(key);
351         if (hash == -1)
352             return -1;
353     }
354     return set_add_entry(so, key, hash);
355 }
356 
357 static int
set_contains_key(PySetObject * so,PyObject * key)358 set_contains_key(PySetObject *so, PyObject *key)
359 {
360     Py_hash_t hash;
361 
362     if (!PyUnicode_CheckExact(key) ||
363         (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
364         hash = PyObject_Hash(key);
365         if (hash == -1)
366             return -1;
367     }
368     return set_contains_entry(so, key, hash);
369 }
370 
371 static int
set_discard_key(PySetObject * so,PyObject * key)372 set_discard_key(PySetObject *so, PyObject *key)
373 {
374     Py_hash_t hash;
375 
376     if (!PyUnicode_CheckExact(key) ||
377         (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
378         hash = PyObject_Hash(key);
379         if (hash == -1)
380             return -1;
381     }
382     return set_discard_entry(so, key, hash);
383 }
384 
385 static void
set_empty_to_minsize(PySetObject * so)386 set_empty_to_minsize(PySetObject *so)
387 {
388     memset(so->smalltable, 0, sizeof(so->smalltable));
389     so->fill = 0;
390     so->used = 0;
391     so->mask = PySet_MINSIZE - 1;
392     so->table = so->smalltable;
393     so->hash = -1;
394 }
395 
396 static int
set_clear_internal(PySetObject * so)397 set_clear_internal(PySetObject *so)
398 {
399     setentry *entry;
400     setentry *table = so->table;
401     Py_ssize_t fill = so->fill;
402     Py_ssize_t used = so->used;
403     int table_is_malloced = table != so->smalltable;
404     setentry small_copy[PySet_MINSIZE];
405 
406     assert (PyAnySet_Check(so));
407     assert(table != NULL);
408 
409     /* This is delicate.  During the process of clearing the set,
410      * decrefs can cause the set to mutate.  To avoid fatal confusion
411      * (voice of experience), we have to make the set empty before
412      * clearing the slots, and never refer to anything via so->ref while
413      * clearing.
414      */
415     if (table_is_malloced)
416         set_empty_to_minsize(so);
417 
418     else if (fill > 0) {
419         /* It's a small table with something that needs to be cleared.
420          * Afraid the only safe way is to copy the set entries into
421          * another small table first.
422          */
423         memcpy(small_copy, table, sizeof(small_copy));
424         table = small_copy;
425         set_empty_to_minsize(so);
426     }
427     /* else it's a small table that's already empty */
428 
429     /* Now we can finally clear things.  If C had refcounts, we could
430      * assert that the refcount on table is 1 now, i.e. that this function
431      * has unique access to it, so decref side-effects can't alter it.
432      */
433     for (entry = table; used > 0; entry++) {
434         if (entry->key && entry->key != dummy) {
435             used--;
436             Py_DECREF(entry->key);
437         }
438     }
439 
440     if (table_is_malloced)
441         PyMem_Free(table);
442     return 0;
443 }
444 
445 /*
446  * Iterate over a set table.  Use like so:
447  *
448  *     Py_ssize_t pos;
449  *     setentry *entry;
450  *     pos = 0;   # important!  pos should not otherwise be changed by you
451  *     while (set_next(yourset, &pos, &entry)) {
452  *              Refer to borrowed reference in entry->key.
453  *     }
454  *
455  * CAUTION:  In general, it isn't safe to use set_next in a loop that
456  * mutates the table.
457  */
458 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)459 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
460 {
461     Py_ssize_t i;
462     Py_ssize_t mask;
463     setentry *entry;
464 
465     assert (PyAnySet_Check(so));
466     i = *pos_ptr;
467     assert(i >= 0);
468     mask = so->mask;
469     entry = &so->table[i];
470     while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
471         i++;
472         entry++;
473     }
474     *pos_ptr = i+1;
475     if (i > mask)
476         return 0;
477     assert(entry != NULL);
478     *entry_ptr = entry;
479     return 1;
480 }
481 
482 static void
set_dealloc(PySetObject * so)483 set_dealloc(PySetObject *so)
484 {
485     setentry *entry;
486     Py_ssize_t used = so->used;
487 
488     /* bpo-31095: UnTrack is needed before calling any callbacks */
489     PyObject_GC_UnTrack(so);
490     Py_TRASHCAN_BEGIN(so, set_dealloc)
491     if (so->weakreflist != NULL)
492         PyObject_ClearWeakRefs((PyObject *) so);
493 
494     for (entry = so->table; used > 0; entry++) {
495         if (entry->key && entry->key != dummy) {
496                 used--;
497                 Py_DECREF(entry->key);
498         }
499     }
500     if (so->table != so->smalltable)
501         PyMem_Free(so->table);
502     Py_TYPE(so)->tp_free(so);
503     Py_TRASHCAN_END
504 }
505 
506 static PyObject *
set_repr(PySetObject * so)507 set_repr(PySetObject *so)
508 {
509     PyObject *result=NULL, *keys, *listrepr, *tmp;
510     int status = Py_ReprEnter((PyObject*)so);
511 
512     if (status != 0) {
513         if (status < 0)
514             return NULL;
515         return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
516     }
517 
518     /* shortcut for the empty set */
519     if (!so->used) {
520         Py_ReprLeave((PyObject*)so);
521         return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
522     }
523 
524     keys = PySequence_List((PyObject *)so);
525     if (keys == NULL)
526         goto done;
527 
528     /* repr(keys)[1:-1] */
529     listrepr = PyObject_Repr(keys);
530     Py_DECREF(keys);
531     if (listrepr == NULL)
532         goto done;
533     tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
534     Py_DECREF(listrepr);
535     if (tmp == NULL)
536         goto done;
537     listrepr = tmp;
538 
539     if (!PySet_CheckExact(so))
540         result = PyUnicode_FromFormat("%s({%U})",
541                                       Py_TYPE(so)->tp_name,
542                                       listrepr);
543     else
544         result = PyUnicode_FromFormat("{%U}", listrepr);
545     Py_DECREF(listrepr);
546 done:
547     Py_ReprLeave((PyObject*)so);
548     return result;
549 }
550 
551 static Py_ssize_t
set_len(PyObject * so)552 set_len(PyObject *so)
553 {
554     return ((PySetObject *)so)->used;
555 }
556 
557 static int
set_merge(PySetObject * so,PyObject * otherset)558 set_merge(PySetObject *so, PyObject *otherset)
559 {
560     PySetObject *other;
561     PyObject *key;
562     Py_ssize_t i;
563     setentry *so_entry;
564     setentry *other_entry;
565 
566     assert (PyAnySet_Check(so));
567     assert (PyAnySet_Check(otherset));
568 
569     other = (PySetObject*)otherset;
570     if (other == so || other->used == 0)
571         /* a.update(a) or a.update(set()); nothing to do */
572         return 0;
573     /* Do one big resize at the start, rather than
574      * incrementally resizing as we insert new keys.  Expect
575      * that there will be no (or few) overlapping keys.
576      */
577     if ((so->fill + other->used)*5 >= so->mask*3) {
578         if (set_table_resize(so, (so->used + other->used)*2) != 0)
579             return -1;
580     }
581     so_entry = so->table;
582     other_entry = other->table;
583 
584     /* If our table is empty, and both tables have the same size, and
585        there are no dummies to eliminate, then just copy the pointers. */
586     if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
587         for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
588             key = other_entry->key;
589             if (key != NULL) {
590                 assert(so_entry->key == NULL);
591                 Py_INCREF(key);
592                 so_entry->key = key;
593                 so_entry->hash = other_entry->hash;
594             }
595         }
596         so->fill = other->fill;
597         so->used = other->used;
598         return 0;
599     }
600 
601     /* If our table is empty, we can use set_insert_clean() */
602     if (so->fill == 0) {
603         setentry *newtable = so->table;
604         size_t newmask = (size_t)so->mask;
605         so->fill = other->used;
606         so->used = other->used;
607         for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
608             key = other_entry->key;
609             if (key != NULL && key != dummy) {
610                 Py_INCREF(key);
611                 set_insert_clean(newtable, newmask, key, other_entry->hash);
612             }
613         }
614         return 0;
615     }
616 
617     /* We can't assure there are no duplicates, so do normal insertions */
618     for (i = 0; i <= other->mask; i++) {
619         other_entry = &other->table[i];
620         key = other_entry->key;
621         if (key != NULL && key != dummy) {
622             if (set_add_entry(so, key, other_entry->hash))
623                 return -1;
624         }
625     }
626     return 0;
627 }
628 
629 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))630 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
631 {
632     /* Make sure the search finger is in bounds */
633     setentry *entry = so->table + (so->finger & so->mask);
634     setentry *limit = so->table + so->mask;
635     PyObject *key;
636 
637     if (so->used == 0) {
638         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
639         return NULL;
640     }
641     while (entry->key == NULL || entry->key==dummy) {
642         entry++;
643         if (entry > limit)
644             entry = so->table;
645     }
646     key = entry->key;
647     entry->key = dummy;
648     entry->hash = -1;
649     so->used--;
650     so->finger = entry - so->table + 1;   /* next place to start */
651     return key;
652 }
653 
654 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
655 Raises KeyError if the set is empty.");
656 
657 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)658 set_traverse(PySetObject *so, visitproc visit, void *arg)
659 {
660     Py_ssize_t pos = 0;
661     setentry *entry;
662 
663     while (set_next(so, &pos, &entry))
664         Py_VISIT(entry->key);
665     return 0;
666 }
667 
668 /* Work to increase the bit dispersion for closely spaced hash values.
669    This is important because some use cases have many combinations of a
670    small number of elements with nearby hashes so that many distinct
671    combinations collapse to only a handful of distinct hash values. */
672 
673 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)674 _shuffle_bits(Py_uhash_t h)
675 {
676     return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
677 }
678 
679 /* Most of the constants in this hash algorithm are randomly chosen
680    large primes with "interesting bit patterns" and that passed tests
681    for good collision statistics on a variety of problematic datasets
682    including powersets and graph structures (such as David Eppstein's
683    graph recipes in Lib/test/test_set.py) */
684 
685 static Py_hash_t
frozenset_hash(PyObject * self)686 frozenset_hash(PyObject *self)
687 {
688     PySetObject *so = (PySetObject *)self;
689     Py_uhash_t hash = 0;
690     setentry *entry;
691 
692     if (so->hash != -1)
693         return so->hash;
694 
695     /* Xor-in shuffled bits from every entry's hash field because xor is
696        commutative and a frozenset hash should be independent of order.
697 
698        For speed, include null entries and dummy entries and then
699        subtract out their effect afterwards so that the final hash
700        depends only on active entries.  This allows the code to be
701        vectorized by the compiler and it saves the unpredictable
702        branches that would arise when trying to exclude null and dummy
703        entries on every iteration. */
704 
705     for (entry = so->table; entry <= &so->table[so->mask]; entry++)
706         hash ^= _shuffle_bits(entry->hash);
707 
708     /* Remove the effect of an odd number of NULL entries */
709     if ((so->mask + 1 - so->fill) & 1)
710         hash ^= _shuffle_bits(0);
711 
712     /* Remove the effect of an odd number of dummy entries */
713     if ((so->fill - so->used) & 1)
714         hash ^= _shuffle_bits(-1);
715 
716     /* Factor in the number of active entries */
717     hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
718 
719     /* Disperse patterns arising in nested frozensets */
720     hash ^= (hash >> 11) ^ (hash >> 25);
721     hash = hash * 69069U + 907133923UL;
722 
723     /* -1 is reserved as an error code */
724     if (hash == (Py_uhash_t)-1)
725         hash = 590923713UL;
726 
727     so->hash = hash;
728     return hash;
729 }
730 
731 /***** Set iterator type ***********************************************/
732 
733 typedef struct {
734     PyObject_HEAD
735     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
736     Py_ssize_t si_used;
737     Py_ssize_t si_pos;
738     Py_ssize_t len;
739 } setiterobject;
740 
741 static void
setiter_dealloc(setiterobject * si)742 setiter_dealloc(setiterobject *si)
743 {
744     /* bpo-31095: UnTrack is needed before calling any callbacks */
745     _PyObject_GC_UNTRACK(si);
746     Py_XDECREF(si->si_set);
747     PyObject_GC_Del(si);
748 }
749 
750 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)751 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
752 {
753     Py_VISIT(si->si_set);
754     return 0;
755 }
756 
757 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))758 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
759 {
760     Py_ssize_t len = 0;
761     if (si->si_set != NULL && si->si_used == si->si_set->used)
762         len = si->len;
763     return PyLong_FromSsize_t(len);
764 }
765 
766 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
767 
768 static PyObject *setiter_iternext(setiterobject *si);
769 
770 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))771 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
772 {
773     /* copy the iterator state */
774     setiterobject tmp = *si;
775     Py_XINCREF(tmp.si_set);
776 
777     /* iterate the temporary into a list */
778     PyObject *list = PySequence_List((PyObject*)&tmp);
779     Py_XDECREF(tmp.si_set);
780     if (list == NULL) {
781         return NULL;
782     }
783     return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), list);
784 }
785 
786 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
787 
788 static PyMethodDef setiter_methods[] = {
789     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
790     {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
791     {NULL,              NULL}           /* sentinel */
792 };
793 
setiter_iternext(setiterobject * si)794 static PyObject *setiter_iternext(setiterobject *si)
795 {
796     PyObject *key;
797     Py_ssize_t i, mask;
798     setentry *entry;
799     PySetObject *so = si->si_set;
800 
801     if (so == NULL)
802         return NULL;
803     assert (PyAnySet_Check(so));
804 
805     if (si->si_used != so->used) {
806         PyErr_SetString(PyExc_RuntimeError,
807                         "Set changed size during iteration");
808         si->si_used = -1; /* Make this state sticky */
809         return NULL;
810     }
811 
812     i = si->si_pos;
813     assert(i>=0);
814     entry = so->table;
815     mask = so->mask;
816     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
817         i++;
818     si->si_pos = i+1;
819     if (i > mask)
820         goto fail;
821     si->len--;
822     key = entry[i].key;
823     Py_INCREF(key);
824     return key;
825 
826 fail:
827     si->si_set = NULL;
828     Py_DECREF(so);
829     return NULL;
830 }
831 
832 PyTypeObject PySetIter_Type = {
833     PyVarObject_HEAD_INIT(&PyType_Type, 0)
834     "set_iterator",                             /* tp_name */
835     sizeof(setiterobject),                      /* tp_basicsize */
836     0,                                          /* tp_itemsize */
837     /* methods */
838     (destructor)setiter_dealloc,                /* tp_dealloc */
839     0,                                          /* tp_vectorcall_offset */
840     0,                                          /* tp_getattr */
841     0,                                          /* tp_setattr */
842     0,                                          /* tp_as_async */
843     0,                                          /* tp_repr */
844     0,                                          /* tp_as_number */
845     0,                                          /* tp_as_sequence */
846     0,                                          /* tp_as_mapping */
847     0,                                          /* tp_hash */
848     0,                                          /* tp_call */
849     0,                                          /* tp_str */
850     PyObject_GenericGetAttr,                    /* tp_getattro */
851     0,                                          /* tp_setattro */
852     0,                                          /* tp_as_buffer */
853     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
854     0,                                          /* tp_doc */
855     (traverseproc)setiter_traverse,             /* tp_traverse */
856     0,                                          /* tp_clear */
857     0,                                          /* tp_richcompare */
858     0,                                          /* tp_weaklistoffset */
859     PyObject_SelfIter,                          /* tp_iter */
860     (iternextfunc)setiter_iternext,             /* tp_iternext */
861     setiter_methods,                            /* tp_methods */
862     0,
863 };
864 
865 static PyObject *
set_iter(PySetObject * so)866 set_iter(PySetObject *so)
867 {
868     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
869     if (si == NULL)
870         return NULL;
871     Py_INCREF(so);
872     si->si_set = so;
873     si->si_used = so->used;
874     si->si_pos = 0;
875     si->len = so->used;
876     _PyObject_GC_TRACK(si);
877     return (PyObject *)si;
878 }
879 
880 static int
set_update_internal(PySetObject * so,PyObject * other)881 set_update_internal(PySetObject *so, PyObject *other)
882 {
883     PyObject *key, *it;
884 
885     if (PyAnySet_Check(other))
886         return set_merge(so, other);
887 
888     if (PyDict_CheckExact(other)) {
889         PyObject *value;
890         Py_ssize_t pos = 0;
891         Py_hash_t hash;
892         Py_ssize_t dictsize = PyDict_GET_SIZE(other);
893 
894         /* Do one big resize at the start, rather than
895         * incrementally resizing as we insert new keys.  Expect
896         * that there will be no (or few) overlapping keys.
897         */
898         if (dictsize < 0)
899             return -1;
900         if ((so->fill + dictsize)*5 >= so->mask*3) {
901             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
902                 return -1;
903         }
904         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
905             if (set_add_entry(so, key, hash))
906                 return -1;
907         }
908         return 0;
909     }
910 
911     it = PyObject_GetIter(other);
912     if (it == NULL)
913         return -1;
914 
915     while ((key = PyIter_Next(it)) != NULL) {
916         if (set_add_key(so, key)) {
917             Py_DECREF(it);
918             Py_DECREF(key);
919             return -1;
920         }
921         Py_DECREF(key);
922     }
923     Py_DECREF(it);
924     if (PyErr_Occurred())
925         return -1;
926     return 0;
927 }
928 
929 static PyObject *
set_update(PySetObject * so,PyObject * args)930 set_update(PySetObject *so, PyObject *args)
931 {
932     Py_ssize_t i;
933 
934     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
935         PyObject *other = PyTuple_GET_ITEM(args, i);
936         if (set_update_internal(so, other))
937             return NULL;
938     }
939     Py_RETURN_NONE;
940 }
941 
942 PyDoc_STRVAR(update_doc,
943 "Update a set with the union of itself and others.");
944 
945 /* XXX Todo:
946    If aligned memory allocations become available, make the
947    set object 64 byte aligned so that most of the fields
948    can be retrieved or updated in a single cache line.
949 */
950 
951 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)952 make_new_set(PyTypeObject *type, PyObject *iterable)
953 {
954     assert(PyType_Check(type));
955     PySetObject *so;
956 
957     so = (PySetObject *)type->tp_alloc(type, 0);
958     if (so == NULL)
959         return NULL;
960 
961     so->fill = 0;
962     so->used = 0;
963     so->mask = PySet_MINSIZE - 1;
964     so->table = so->smalltable;
965     so->hash = -1;
966     so->finger = 0;
967     so->weakreflist = NULL;
968 
969     if (iterable != NULL) {
970         if (set_update_internal(so, iterable)) {
971             Py_DECREF(so);
972             return NULL;
973         }
974     }
975 
976     return (PyObject *)so;
977 }
978 
979 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)980 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
981 {
982     if (type != &PySet_Type && type != &PyFrozenSet_Type) {
983         if (PyType_IsSubtype(type, &PySet_Type))
984             type = &PySet_Type;
985         else
986             type = &PyFrozenSet_Type;
987     }
988     return make_new_set(type, iterable);
989 }
990 
991 static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)992 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
993 {
994     if (type != &PyFrozenSet_Type) {
995         return make_new_set(type, iterable);
996     }
997 
998     if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) {
999         /* frozenset(f) is idempotent */
1000         Py_INCREF(iterable);
1001         return iterable;
1002     }
1003     return make_new_set(type, iterable);
1004 }
1005 
1006 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1007 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1008 {
1009     PyObject *iterable = NULL;
1010 
1011     if ((type == &PyFrozenSet_Type ||
1012          type->tp_init == PyFrozenSet_Type.tp_init) &&
1013         !_PyArg_NoKeywords("frozenset", kwds)) {
1014         return NULL;
1015     }
1016 
1017     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1018         return NULL;
1019     }
1020 
1021     return make_new_frozenset(type, iterable);
1022 }
1023 
1024 static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1025 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1026                      size_t nargsf, PyObject *kwnames)
1027 {
1028     if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1029         return NULL;
1030     }
1031 
1032     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1033     if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1034         return NULL;
1035     }
1036 
1037     PyObject *iterable = (nargs ? args[0] : NULL);
1038     return make_new_frozenset(_PyType_CAST(type), iterable);
1039 }
1040 
1041 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1042 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1043 {
1044     return make_new_set(type, NULL);
1045 }
1046 
1047 /* set_swap_bodies() switches the contents of any two sets by moving their
1048    internal data pointers and, if needed, copying the internal smalltables.
1049    Semantically equivalent to:
1050 
1051      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1052 
1053    The function always succeeds and it leaves both objects in a stable state.
1054    Useful for operations that update in-place (by allowing an intermediate
1055    result to be swapped into one of the original inputs).
1056 */
1057 
1058 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1059 set_swap_bodies(PySetObject *a, PySetObject *b)
1060 {
1061     Py_ssize_t t;
1062     setentry *u;
1063     setentry tab[PySet_MINSIZE];
1064     Py_hash_t h;
1065 
1066     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1067     t = a->used;     a->used   = b->used;        b->used  = t;
1068     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1069 
1070     u = a->table;
1071     if (a->table == a->smalltable)
1072         u = b->smalltable;
1073     a->table  = b->table;
1074     if (b->table == b->smalltable)
1075         a->table = a->smalltable;
1076     b->table = u;
1077 
1078     if (a->table == a->smalltable || b->table == b->smalltable) {
1079         memcpy(tab, a->smalltable, sizeof(tab));
1080         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1081         memcpy(b->smalltable, tab, sizeof(tab));
1082     }
1083 
1084     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1085         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1086         h = a->hash;     a->hash = b->hash;  b->hash = h;
1087     } else {
1088         a->hash = -1;
1089         b->hash = -1;
1090     }
1091 }
1092 
1093 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1094 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1095 {
1096     return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1097 }
1098 
1099 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1100 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1101 {
1102     if (PyFrozenSet_CheckExact(so)) {
1103         Py_INCREF(so);
1104         return (PyObject *)so;
1105     }
1106     return set_copy(so, NULL);
1107 }
1108 
1109 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1110 
1111 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1112 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1113 {
1114     set_clear_internal(so);
1115     Py_RETURN_NONE;
1116 }
1117 
1118 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1119 
1120 static PyObject *
set_union(PySetObject * so,PyObject * args)1121 set_union(PySetObject *so, PyObject *args)
1122 {
1123     PySetObject *result;
1124     PyObject *other;
1125     Py_ssize_t i;
1126 
1127     result = (PySetObject *)set_copy(so, NULL);
1128     if (result == NULL)
1129         return NULL;
1130 
1131     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1132         other = PyTuple_GET_ITEM(args, i);
1133         if ((PyObject *)so == other)
1134             continue;
1135         if (set_update_internal(result, other)) {
1136             Py_DECREF(result);
1137             return NULL;
1138         }
1139     }
1140     return (PyObject *)result;
1141 }
1142 
1143 PyDoc_STRVAR(union_doc,
1144  "Return the union of sets as a new set.\n\
1145 \n\
1146 (i.e. all elements that are in either set.)");
1147 
1148 static PyObject *
set_or(PySetObject * so,PyObject * other)1149 set_or(PySetObject *so, PyObject *other)
1150 {
1151     PySetObject *result;
1152 
1153     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1154         Py_RETURN_NOTIMPLEMENTED;
1155 
1156     result = (PySetObject *)set_copy(so, NULL);
1157     if (result == NULL)
1158         return NULL;
1159     if ((PyObject *)so == other)
1160         return (PyObject *)result;
1161     if (set_update_internal(result, other)) {
1162         Py_DECREF(result);
1163         return NULL;
1164     }
1165     return (PyObject *)result;
1166 }
1167 
1168 static PyObject *
set_ior(PySetObject * so,PyObject * other)1169 set_ior(PySetObject *so, PyObject *other)
1170 {
1171     if (!PyAnySet_Check(other))
1172         Py_RETURN_NOTIMPLEMENTED;
1173 
1174     if (set_update_internal(so, other))
1175         return NULL;
1176     Py_INCREF(so);
1177     return (PyObject *)so;
1178 }
1179 
1180 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1181 set_intersection(PySetObject *so, PyObject *other)
1182 {
1183     PySetObject *result;
1184     PyObject *key, *it, *tmp;
1185     Py_hash_t hash;
1186     int rv;
1187 
1188     if ((PyObject *)so == other)
1189         return set_copy(so, NULL);
1190 
1191     result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1192     if (result == NULL)
1193         return NULL;
1194 
1195     if (PyAnySet_Check(other)) {
1196         Py_ssize_t pos = 0;
1197         setentry *entry;
1198 
1199         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1200             tmp = (PyObject *)so;
1201             so = (PySetObject *)other;
1202             other = tmp;
1203         }
1204 
1205         while (set_next((PySetObject *)other, &pos, &entry)) {
1206             key = entry->key;
1207             hash = entry->hash;
1208             Py_INCREF(key);
1209             rv = set_contains_entry(so, key, hash);
1210             if (rv < 0) {
1211                 Py_DECREF(result);
1212                 Py_DECREF(key);
1213                 return NULL;
1214             }
1215             if (rv) {
1216                 if (set_add_entry(result, key, hash)) {
1217                     Py_DECREF(result);
1218                     Py_DECREF(key);
1219                     return NULL;
1220                 }
1221             }
1222             Py_DECREF(key);
1223         }
1224         return (PyObject *)result;
1225     }
1226 
1227     it = PyObject_GetIter(other);
1228     if (it == NULL) {
1229         Py_DECREF(result);
1230         return NULL;
1231     }
1232 
1233     while ((key = PyIter_Next(it)) != NULL) {
1234         hash = PyObject_Hash(key);
1235         if (hash == -1)
1236             goto error;
1237         rv = set_contains_entry(so, key, hash);
1238         if (rv < 0)
1239             goto error;
1240         if (rv) {
1241             if (set_add_entry(result, key, hash))
1242                 goto error;
1243             if (PySet_GET_SIZE(result) >= PySet_GET_SIZE(so)) {
1244                 Py_DECREF(key);
1245                 break;
1246             }
1247         }
1248         Py_DECREF(key);
1249     }
1250     Py_DECREF(it);
1251     if (PyErr_Occurred()) {
1252         Py_DECREF(result);
1253         return NULL;
1254     }
1255     return (PyObject *)result;
1256   error:
1257     Py_DECREF(it);
1258     Py_DECREF(result);
1259     Py_DECREF(key);
1260     return NULL;
1261 }
1262 
1263 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1264 set_intersection_multi(PySetObject *so, PyObject *args)
1265 {
1266     Py_ssize_t i;
1267     PyObject *result = (PyObject *)so;
1268 
1269     if (PyTuple_GET_SIZE(args) == 0)
1270         return set_copy(so, NULL);
1271 
1272     Py_INCREF(so);
1273     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1274         PyObject *other = PyTuple_GET_ITEM(args, i);
1275         PyObject *newresult = set_intersection((PySetObject *)result, other);
1276         if (newresult == NULL) {
1277             Py_DECREF(result);
1278             return NULL;
1279         }
1280         Py_DECREF(result);
1281         result = newresult;
1282     }
1283     return result;
1284 }
1285 
1286 PyDoc_STRVAR(intersection_doc,
1287 "Return the intersection of two sets as a new set.\n\
1288 \n\
1289 (i.e. all elements that are in both sets.)");
1290 
1291 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1292 set_intersection_update(PySetObject *so, PyObject *other)
1293 {
1294     PyObject *tmp;
1295 
1296     tmp = set_intersection(so, other);
1297     if (tmp == NULL)
1298         return NULL;
1299     set_swap_bodies(so, (PySetObject *)tmp);
1300     Py_DECREF(tmp);
1301     Py_RETURN_NONE;
1302 }
1303 
1304 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1305 set_intersection_update_multi(PySetObject *so, PyObject *args)
1306 {
1307     PyObject *tmp;
1308 
1309     tmp = set_intersection_multi(so, args);
1310     if (tmp == NULL)
1311         return NULL;
1312     set_swap_bodies(so, (PySetObject *)tmp);
1313     Py_DECREF(tmp);
1314     Py_RETURN_NONE;
1315 }
1316 
1317 PyDoc_STRVAR(intersection_update_doc,
1318 "Update a set with the intersection of itself and another.");
1319 
1320 static PyObject *
set_and(PySetObject * so,PyObject * other)1321 set_and(PySetObject *so, PyObject *other)
1322 {
1323     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1324         Py_RETURN_NOTIMPLEMENTED;
1325     return set_intersection(so, other);
1326 }
1327 
1328 static PyObject *
set_iand(PySetObject * so,PyObject * other)1329 set_iand(PySetObject *so, PyObject *other)
1330 {
1331     PyObject *result;
1332 
1333     if (!PyAnySet_Check(other))
1334         Py_RETURN_NOTIMPLEMENTED;
1335     result = set_intersection_update(so, other);
1336     if (result == NULL)
1337         return NULL;
1338     Py_DECREF(result);
1339     Py_INCREF(so);
1340     return (PyObject *)so;
1341 }
1342 
1343 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1344 set_isdisjoint(PySetObject *so, PyObject *other)
1345 {
1346     PyObject *key, *it, *tmp;
1347     int rv;
1348 
1349     if ((PyObject *)so == other) {
1350         if (PySet_GET_SIZE(so) == 0)
1351             Py_RETURN_TRUE;
1352         else
1353             Py_RETURN_FALSE;
1354     }
1355 
1356     if (PyAnySet_CheckExact(other)) {
1357         Py_ssize_t pos = 0;
1358         setentry *entry;
1359 
1360         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1361             tmp = (PyObject *)so;
1362             so = (PySetObject *)other;
1363             other = tmp;
1364         }
1365         while (set_next((PySetObject *)other, &pos, &entry)) {
1366             PyObject *key = entry->key;
1367             Py_INCREF(key);
1368             rv = set_contains_entry(so, key, entry->hash);
1369             Py_DECREF(key);
1370             if (rv < 0) {
1371                 return NULL;
1372             }
1373             if (rv) {
1374                 Py_RETURN_FALSE;
1375             }
1376         }
1377         Py_RETURN_TRUE;
1378     }
1379 
1380     it = PyObject_GetIter(other);
1381     if (it == NULL)
1382         return NULL;
1383 
1384     while ((key = PyIter_Next(it)) != NULL) {
1385         rv = set_contains_key(so, key);
1386         Py_DECREF(key);
1387         if (rv < 0) {
1388             Py_DECREF(it);
1389             return NULL;
1390         }
1391         if (rv) {
1392             Py_DECREF(it);
1393             Py_RETURN_FALSE;
1394         }
1395     }
1396     Py_DECREF(it);
1397     if (PyErr_Occurred())
1398         return NULL;
1399     Py_RETURN_TRUE;
1400 }
1401 
1402 PyDoc_STRVAR(isdisjoint_doc,
1403 "Return True if two sets have a null intersection.");
1404 
1405 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1406 set_difference_update_internal(PySetObject *so, PyObject *other)
1407 {
1408     if ((PyObject *)so == other)
1409         return set_clear_internal(so);
1410 
1411     if (PyAnySet_Check(other)) {
1412         setentry *entry;
1413         Py_ssize_t pos = 0;
1414 
1415         /* Optimization:  When the other set is more than 8 times
1416            larger than the base set, replace the other set with
1417            intersection of the two sets.
1418         */
1419         if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1420             other = set_intersection(so, other);
1421             if (other == NULL)
1422                 return -1;
1423         } else {
1424             Py_INCREF(other);
1425         }
1426 
1427         while (set_next((PySetObject *)other, &pos, &entry)) {
1428             PyObject *key = entry->key;
1429             Py_INCREF(key);
1430             if (set_discard_entry(so, key, entry->hash) < 0) {
1431                 Py_DECREF(other);
1432                 Py_DECREF(key);
1433                 return -1;
1434             }
1435             Py_DECREF(key);
1436         }
1437 
1438         Py_DECREF(other);
1439     } else {
1440         PyObject *key, *it;
1441         it = PyObject_GetIter(other);
1442         if (it == NULL)
1443             return -1;
1444 
1445         while ((key = PyIter_Next(it)) != NULL) {
1446             if (set_discard_key(so, key) < 0) {
1447                 Py_DECREF(it);
1448                 Py_DECREF(key);
1449                 return -1;
1450             }
1451             Py_DECREF(key);
1452         }
1453         Py_DECREF(it);
1454         if (PyErr_Occurred())
1455             return -1;
1456     }
1457     /* If more than 1/4th are dummies, then resize them away. */
1458     if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1459         return 0;
1460     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1461 }
1462 
1463 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1464 set_difference_update(PySetObject *so, PyObject *args)
1465 {
1466     Py_ssize_t i;
1467 
1468     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1469         PyObject *other = PyTuple_GET_ITEM(args, i);
1470         if (set_difference_update_internal(so, other))
1471             return NULL;
1472     }
1473     Py_RETURN_NONE;
1474 }
1475 
1476 PyDoc_STRVAR(difference_update_doc,
1477 "Remove all elements of another set from this set.");
1478 
1479 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1480 set_copy_and_difference(PySetObject *so, PyObject *other)
1481 {
1482     PyObject *result;
1483 
1484     result = set_copy(so, NULL);
1485     if (result == NULL)
1486         return NULL;
1487     if (set_difference_update_internal((PySetObject *) result, other) == 0)
1488         return result;
1489     Py_DECREF(result);
1490     return NULL;
1491 }
1492 
1493 static PyObject *
set_difference(PySetObject * so,PyObject * other)1494 set_difference(PySetObject *so, PyObject *other)
1495 {
1496     PyObject *result;
1497     PyObject *key;
1498     Py_hash_t hash;
1499     setentry *entry;
1500     Py_ssize_t pos = 0, other_size;
1501     int rv;
1502 
1503     if (PyAnySet_Check(other)) {
1504         other_size = PySet_GET_SIZE(other);
1505     }
1506     else if (PyDict_CheckExact(other)) {
1507         other_size = PyDict_GET_SIZE(other);
1508     }
1509     else {
1510         return set_copy_and_difference(so, other);
1511     }
1512 
1513     /* If len(so) much more than len(other), it's more efficient to simply copy
1514      * so and then iterate other looking for common elements. */
1515     if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1516         return set_copy_and_difference(so, other);
1517     }
1518 
1519     result = make_new_set_basetype(Py_TYPE(so), NULL);
1520     if (result == NULL)
1521         return NULL;
1522 
1523     if (PyDict_CheckExact(other)) {
1524         while (set_next(so, &pos, &entry)) {
1525             key = entry->key;
1526             hash = entry->hash;
1527             Py_INCREF(key);
1528             rv = _PyDict_Contains_KnownHash(other, key, hash);
1529             if (rv < 0) {
1530                 Py_DECREF(result);
1531                 Py_DECREF(key);
1532                 return NULL;
1533             }
1534             if (!rv) {
1535                 if (set_add_entry((PySetObject *)result, key, hash)) {
1536                     Py_DECREF(result);
1537                     Py_DECREF(key);
1538                     return NULL;
1539                 }
1540             }
1541             Py_DECREF(key);
1542         }
1543         return result;
1544     }
1545 
1546     /* Iterate over so, checking for common elements in other. */
1547     while (set_next(so, &pos, &entry)) {
1548         key = entry->key;
1549         hash = entry->hash;
1550         Py_INCREF(key);
1551         rv = set_contains_entry((PySetObject *)other, key, hash);
1552         if (rv < 0) {
1553             Py_DECREF(result);
1554             Py_DECREF(key);
1555             return NULL;
1556         }
1557         if (!rv) {
1558             if (set_add_entry((PySetObject *)result, key, hash)) {
1559                 Py_DECREF(result);
1560                 Py_DECREF(key);
1561                 return NULL;
1562             }
1563         }
1564         Py_DECREF(key);
1565     }
1566     return result;
1567 }
1568 
1569 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1570 set_difference_multi(PySetObject *so, PyObject *args)
1571 {
1572     Py_ssize_t i;
1573     PyObject *result, *other;
1574 
1575     if (PyTuple_GET_SIZE(args) == 0)
1576         return set_copy(so, NULL);
1577 
1578     other = PyTuple_GET_ITEM(args, 0);
1579     result = set_difference(so, other);
1580     if (result == NULL)
1581         return NULL;
1582 
1583     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1584         other = PyTuple_GET_ITEM(args, i);
1585         if (set_difference_update_internal((PySetObject *)result, other)) {
1586             Py_DECREF(result);
1587             return NULL;
1588         }
1589     }
1590     return result;
1591 }
1592 
1593 PyDoc_STRVAR(difference_doc,
1594 "Return the difference of two or more sets as a new set.\n\
1595 \n\
1596 (i.e. all elements that are in this set but not the others.)");
1597 static PyObject *
set_sub(PySetObject * so,PyObject * other)1598 set_sub(PySetObject *so, PyObject *other)
1599 {
1600     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1601         Py_RETURN_NOTIMPLEMENTED;
1602     return set_difference(so, other);
1603 }
1604 
1605 static PyObject *
set_isub(PySetObject * so,PyObject * other)1606 set_isub(PySetObject *so, PyObject *other)
1607 {
1608     if (!PyAnySet_Check(other))
1609         Py_RETURN_NOTIMPLEMENTED;
1610     if (set_difference_update_internal(so, other))
1611         return NULL;
1612     Py_INCREF(so);
1613     return (PyObject *)so;
1614 }
1615 
1616 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1617 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1618 {
1619     PySetObject *otherset;
1620     PyObject *key;
1621     Py_ssize_t pos = 0;
1622     Py_hash_t hash;
1623     setentry *entry;
1624     int rv;
1625 
1626     if ((PyObject *)so == other)
1627         return set_clear(so, NULL);
1628 
1629     if (PyDict_CheckExact(other)) {
1630         PyObject *value;
1631         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1632             Py_INCREF(key);
1633             rv = set_discard_entry(so, key, hash);
1634             if (rv < 0) {
1635                 Py_DECREF(key);
1636                 return NULL;
1637             }
1638             if (rv == DISCARD_NOTFOUND) {
1639                 if (set_add_entry(so, key, hash)) {
1640                     Py_DECREF(key);
1641                     return NULL;
1642                 }
1643             }
1644             Py_DECREF(key);
1645         }
1646         Py_RETURN_NONE;
1647     }
1648 
1649     if (PyAnySet_Check(other)) {
1650         Py_INCREF(other);
1651         otherset = (PySetObject *)other;
1652     } else {
1653         otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1654         if (otherset == NULL)
1655             return NULL;
1656     }
1657 
1658     while (set_next(otherset, &pos, &entry)) {
1659         key = entry->key;
1660         hash = entry->hash;
1661         Py_INCREF(key);
1662         rv = set_discard_entry(so, key, hash);
1663         if (rv < 0) {
1664             Py_DECREF(otherset);
1665             Py_DECREF(key);
1666             return NULL;
1667         }
1668         if (rv == DISCARD_NOTFOUND) {
1669             if (set_add_entry(so, key, hash)) {
1670                 Py_DECREF(otherset);
1671                 Py_DECREF(key);
1672                 return NULL;
1673             }
1674         }
1675         Py_DECREF(key);
1676     }
1677     Py_DECREF(otherset);
1678     Py_RETURN_NONE;
1679 }
1680 
1681 PyDoc_STRVAR(symmetric_difference_update_doc,
1682 "Update a set with the symmetric difference of itself and another.");
1683 
1684 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1685 set_symmetric_difference(PySetObject *so, PyObject *other)
1686 {
1687     PyObject *rv;
1688     PySetObject *otherset;
1689 
1690     otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1691     if (otherset == NULL)
1692         return NULL;
1693     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1694     if (rv == NULL) {
1695         Py_DECREF(otherset);
1696         return NULL;
1697     }
1698     Py_DECREF(rv);
1699     return (PyObject *)otherset;
1700 }
1701 
1702 PyDoc_STRVAR(symmetric_difference_doc,
1703 "Return the symmetric difference of two sets as a new set.\n\
1704 \n\
1705 (i.e. all elements that are in exactly one of the sets.)");
1706 
1707 static PyObject *
set_xor(PySetObject * so,PyObject * other)1708 set_xor(PySetObject *so, PyObject *other)
1709 {
1710     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1711         Py_RETURN_NOTIMPLEMENTED;
1712     return set_symmetric_difference(so, other);
1713 }
1714 
1715 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1716 set_ixor(PySetObject *so, PyObject *other)
1717 {
1718     PyObject *result;
1719 
1720     if (!PyAnySet_Check(other))
1721         Py_RETURN_NOTIMPLEMENTED;
1722     result = set_symmetric_difference_update(so, other);
1723     if (result == NULL)
1724         return NULL;
1725     Py_DECREF(result);
1726     Py_INCREF(so);
1727     return (PyObject *)so;
1728 }
1729 
1730 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1731 set_issubset(PySetObject *so, PyObject *other)
1732 {
1733     setentry *entry;
1734     Py_ssize_t pos = 0;
1735     int rv;
1736 
1737     if (!PyAnySet_Check(other)) {
1738         PyObject *tmp, *result;
1739         tmp = make_new_set(&PySet_Type, other);
1740         if (tmp == NULL)
1741             return NULL;
1742         result = set_issubset(so, tmp);
1743         Py_DECREF(tmp);
1744         return result;
1745     }
1746     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1747         Py_RETURN_FALSE;
1748 
1749     while (set_next(so, &pos, &entry)) {
1750         PyObject *key = entry->key;
1751         Py_INCREF(key);
1752         rv = set_contains_entry((PySetObject *)other, key, entry->hash);
1753         Py_DECREF(key);
1754         if (rv < 0) {
1755             return NULL;
1756         }
1757         if (!rv) {
1758             Py_RETURN_FALSE;
1759         }
1760     }
1761     Py_RETURN_TRUE;
1762 }
1763 
1764 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1765 
1766 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1767 set_issuperset(PySetObject *so, PyObject *other)
1768 {
1769     if (PyAnySet_Check(other)) {
1770         return set_issubset((PySetObject *)other, (PyObject *)so);
1771     }
1772 
1773     PyObject *key, *it = PyObject_GetIter(other);
1774     if (it == NULL) {
1775         return NULL;
1776     }
1777     while ((key = PyIter_Next(it)) != NULL) {
1778         int rv = set_contains_key(so, key);
1779         Py_DECREF(key);
1780         if (rv < 0) {
1781             Py_DECREF(it);
1782             return NULL;
1783         }
1784         if (!rv) {
1785             Py_DECREF(it);
1786             Py_RETURN_FALSE;
1787         }
1788     }
1789     Py_DECREF(it);
1790     if (PyErr_Occurred()) {
1791         return NULL;
1792     }
1793     Py_RETURN_TRUE;
1794 }
1795 
1796 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1797 
1798 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1799 set_richcompare(PySetObject *v, PyObject *w, int op)
1800 {
1801     PyObject *r1;
1802     int r2;
1803 
1804     if(!PyAnySet_Check(w))
1805         Py_RETURN_NOTIMPLEMENTED;
1806 
1807     switch (op) {
1808     case Py_EQ:
1809         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1810             Py_RETURN_FALSE;
1811         if (v->hash != -1  &&
1812             ((PySetObject *)w)->hash != -1 &&
1813             v->hash != ((PySetObject *)w)->hash)
1814             Py_RETURN_FALSE;
1815         return set_issubset(v, w);
1816     case Py_NE:
1817         r1 = set_richcompare(v, w, Py_EQ);
1818         if (r1 == NULL)
1819             return NULL;
1820         r2 = PyObject_IsTrue(r1);
1821         Py_DECREF(r1);
1822         if (r2 < 0)
1823             return NULL;
1824         return PyBool_FromLong(!r2);
1825     case Py_LE:
1826         return set_issubset(v, w);
1827     case Py_GE:
1828         return set_issuperset(v, w);
1829     case Py_LT:
1830         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1831             Py_RETURN_FALSE;
1832         return set_issubset(v, w);
1833     case Py_GT:
1834         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1835             Py_RETURN_FALSE;
1836         return set_issuperset(v, w);
1837     }
1838     Py_RETURN_NOTIMPLEMENTED;
1839 }
1840 
1841 static PyObject *
set_add(PySetObject * so,PyObject * key)1842 set_add(PySetObject *so, PyObject *key)
1843 {
1844     if (set_add_key(so, key))
1845         return NULL;
1846     Py_RETURN_NONE;
1847 }
1848 
1849 PyDoc_STRVAR(add_doc,
1850 "Add an element to a set.\n\
1851 \n\
1852 This has no effect if the element is already present.");
1853 
1854 static int
set_contains(PySetObject * so,PyObject * key)1855 set_contains(PySetObject *so, PyObject *key)
1856 {
1857     PyObject *tmpkey;
1858     int rv;
1859 
1860     rv = set_contains_key(so, key);
1861     if (rv < 0) {
1862         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1863             return -1;
1864         PyErr_Clear();
1865         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1866         if (tmpkey == NULL)
1867             return -1;
1868         rv = set_contains_key(so, tmpkey);
1869         Py_DECREF(tmpkey);
1870     }
1871     return rv;
1872 }
1873 
1874 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1875 set_direct_contains(PySetObject *so, PyObject *key)
1876 {
1877     long result;
1878 
1879     result = set_contains(so, key);
1880     if (result < 0)
1881         return NULL;
1882     return PyBool_FromLong(result);
1883 }
1884 
1885 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1886 
1887 static PyObject *
set_remove(PySetObject * so,PyObject * key)1888 set_remove(PySetObject *so, PyObject *key)
1889 {
1890     PyObject *tmpkey;
1891     int rv;
1892 
1893     rv = set_discard_key(so, key);
1894     if (rv < 0) {
1895         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1896             return NULL;
1897         PyErr_Clear();
1898         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1899         if (tmpkey == NULL)
1900             return NULL;
1901         rv = set_discard_key(so, tmpkey);
1902         Py_DECREF(tmpkey);
1903         if (rv < 0)
1904             return NULL;
1905     }
1906 
1907     if (rv == DISCARD_NOTFOUND) {
1908         _PyErr_SetKeyError(key);
1909         return NULL;
1910     }
1911     Py_RETURN_NONE;
1912 }
1913 
1914 PyDoc_STRVAR(remove_doc,
1915 "Remove an element from a set; it must be a member.\n\
1916 \n\
1917 If the element is not a member, raise a KeyError.");
1918 
1919 static PyObject *
set_discard(PySetObject * so,PyObject * key)1920 set_discard(PySetObject *so, PyObject *key)
1921 {
1922     PyObject *tmpkey;
1923     int rv;
1924 
1925     rv = set_discard_key(so, key);
1926     if (rv < 0) {
1927         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1928             return NULL;
1929         PyErr_Clear();
1930         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1931         if (tmpkey == NULL)
1932             return NULL;
1933         rv = set_discard_key(so, tmpkey);
1934         Py_DECREF(tmpkey);
1935         if (rv < 0)
1936             return NULL;
1937     }
1938     Py_RETURN_NONE;
1939 }
1940 
1941 PyDoc_STRVAR(discard_doc,
1942 "Remove an element from a set if it is a member.\n\
1943 \n\
1944 Unlike set.remove(), the discard() method does not raise\n\
1945 an exception when an element is missing from the set.");
1946 
1947 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1948 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1949 {
1950     PyObject *keys=NULL, *args=NULL, *result=NULL, *state=NULL;
1951 
1952     keys = PySequence_List((PyObject *)so);
1953     if (keys == NULL)
1954         goto done;
1955     args = PyTuple_Pack(1, keys);
1956     if (args == NULL)
1957         goto done;
1958     state = _PyObject_GetState((PyObject *)so);
1959     if (state == NULL)
1960         goto done;
1961     result = PyTuple_Pack(3, Py_TYPE(so), args, state);
1962 done:
1963     Py_XDECREF(args);
1964     Py_XDECREF(keys);
1965     Py_XDECREF(state);
1966     return result;
1967 }
1968 
1969 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1970 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1971 {
1972     Py_ssize_t res;
1973 
1974     res = _PyObject_SIZE(Py_TYPE(so));
1975     if (so->table != so->smalltable)
1976         res = res + (so->mask + 1) * sizeof(setentry);
1977     return PyLong_FromSsize_t(res);
1978 }
1979 
1980 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1981 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1982 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1983 {
1984     PyObject *iterable = NULL;
1985 
1986      if (!_PyArg_NoKeywords("set", kwds))
1987         return -1;
1988     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1989         return -1;
1990     if (self->fill)
1991         set_clear_internal(self);
1992     self->hash = -1;
1993     if (iterable == NULL)
1994         return 0;
1995     return set_update_internal(self, iterable);
1996 }
1997 
1998 static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1999 set_vectorcall(PyObject *type, PyObject * const*args,
2000                size_t nargsf, PyObject *kwnames)
2001 {
2002     assert(PyType_Check(type));
2003 
2004     if (!_PyArg_NoKwnames("set", kwnames)) {
2005         return NULL;
2006     }
2007 
2008     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
2009     if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
2010         return NULL;
2011     }
2012 
2013     if (nargs) {
2014         return make_new_set(_PyType_CAST(type), args[0]);
2015     }
2016 
2017     return make_new_set(_PyType_CAST(type), NULL);
2018 }
2019 
2020 static PySequenceMethods set_as_sequence = {
2021     set_len,                            /* sq_length */
2022     0,                                  /* sq_concat */
2023     0,                                  /* sq_repeat */
2024     0,                                  /* sq_item */
2025     0,                                  /* sq_slice */
2026     0,                                  /* sq_ass_item */
2027     0,                                  /* sq_ass_slice */
2028     (objobjproc)set_contains,           /* sq_contains */
2029 };
2030 
2031 /* set object ********************************************************/
2032 
2033 #ifdef Py_DEBUG
2034 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
2035 
2036 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2037 All is well if assertions don't fail.");
2038 #endif
2039 
2040 static PyMethodDef set_methods[] = {
2041     {"add",             (PyCFunction)set_add,           METH_O,
2042      add_doc},
2043     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2044      clear_doc},
2045     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2046      contains_doc},
2047     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2048      copy_doc},
2049     {"discard",         (PyCFunction)set_discard,       METH_O,
2050      discard_doc},
2051     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2052      difference_doc},
2053     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2054      difference_update_doc},
2055     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2056      intersection_doc},
2057     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2058      intersection_update_doc},
2059     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2060      isdisjoint_doc},
2061     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2062      issubset_doc},
2063     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2064      issuperset_doc},
2065     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2066      pop_doc},
2067     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2068      reduce_doc},
2069     {"remove",          (PyCFunction)set_remove,        METH_O,
2070      remove_doc},
2071     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2072      sizeof_doc},
2073     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2074      symmetric_difference_doc},
2075     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2076      symmetric_difference_update_doc},
2077 #ifdef Py_DEBUG
2078     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2079      test_c_api_doc},
2080 #endif
2081     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2082      union_doc},
2083     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2084      update_doc},
2085     {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2086     {NULL,              NULL}   /* sentinel */
2087 };
2088 
2089 static PyNumberMethods set_as_number = {
2090     0,                                  /*nb_add*/
2091     (binaryfunc)set_sub,                /*nb_subtract*/
2092     0,                                  /*nb_multiply*/
2093     0,                                  /*nb_remainder*/
2094     0,                                  /*nb_divmod*/
2095     0,                                  /*nb_power*/
2096     0,                                  /*nb_negative*/
2097     0,                                  /*nb_positive*/
2098     0,                                  /*nb_absolute*/
2099     0,                                  /*nb_bool*/
2100     0,                                  /*nb_invert*/
2101     0,                                  /*nb_lshift*/
2102     0,                                  /*nb_rshift*/
2103     (binaryfunc)set_and,                /*nb_and*/
2104     (binaryfunc)set_xor,                /*nb_xor*/
2105     (binaryfunc)set_or,                 /*nb_or*/
2106     0,                                  /*nb_int*/
2107     0,                                  /*nb_reserved*/
2108     0,                                  /*nb_float*/
2109     0,                                  /*nb_inplace_add*/
2110     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2111     0,                                  /*nb_inplace_multiply*/
2112     0,                                  /*nb_inplace_remainder*/
2113     0,                                  /*nb_inplace_power*/
2114     0,                                  /*nb_inplace_lshift*/
2115     0,                                  /*nb_inplace_rshift*/
2116     (binaryfunc)set_iand,               /*nb_inplace_and*/
2117     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2118     (binaryfunc)set_ior,                /*nb_inplace_or*/
2119 };
2120 
2121 PyDoc_STRVAR(set_doc,
2122 "set() -> new empty set object\n\
2123 set(iterable) -> new set object\n\
2124 \n\
2125 Build an unordered collection of unique elements.");
2126 
2127 PyTypeObject PySet_Type = {
2128     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2129     "set",                              /* tp_name */
2130     sizeof(PySetObject),                /* tp_basicsize */
2131     0,                                  /* tp_itemsize */
2132     /* methods */
2133     (destructor)set_dealloc,            /* tp_dealloc */
2134     0,                                  /* tp_vectorcall_offset */
2135     0,                                  /* tp_getattr */
2136     0,                                  /* tp_setattr */
2137     0,                                  /* tp_as_async */
2138     (reprfunc)set_repr,                 /* tp_repr */
2139     &set_as_number,                     /* tp_as_number */
2140     &set_as_sequence,                   /* tp_as_sequence */
2141     0,                                  /* tp_as_mapping */
2142     PyObject_HashNotImplemented,        /* tp_hash */
2143     0,                                  /* tp_call */
2144     0,                                  /* tp_str */
2145     PyObject_GenericGetAttr,            /* tp_getattro */
2146     0,                                  /* tp_setattro */
2147     0,                                  /* tp_as_buffer */
2148     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2149         Py_TPFLAGS_BASETYPE |
2150         _Py_TPFLAGS_MATCH_SELF,       /* tp_flags */
2151     set_doc,                            /* tp_doc */
2152     (traverseproc)set_traverse,         /* tp_traverse */
2153     (inquiry)set_clear_internal,        /* tp_clear */
2154     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2155     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2156     (getiterfunc)set_iter,              /* tp_iter */
2157     0,                                  /* tp_iternext */
2158     set_methods,                        /* tp_methods */
2159     0,                                  /* tp_members */
2160     0,                                  /* tp_getset */
2161     0,                                  /* tp_base */
2162     0,                                  /* tp_dict */
2163     0,                                  /* tp_descr_get */
2164     0,                                  /* tp_descr_set */
2165     0,                                  /* tp_dictoffset */
2166     (initproc)set_init,                 /* tp_init */
2167     PyType_GenericAlloc,                /* tp_alloc */
2168     set_new,                            /* tp_new */
2169     PyObject_GC_Del,                    /* tp_free */
2170     .tp_vectorcall = set_vectorcall,
2171 };
2172 
2173 /* frozenset object ********************************************************/
2174 
2175 
2176 static PyMethodDef frozenset_methods[] = {
2177     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2178      contains_doc},
2179     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2180      copy_doc},
2181     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2182      difference_doc},
2183     {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2184      intersection_doc},
2185     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2186      isdisjoint_doc},
2187     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2188      issubset_doc},
2189     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2190      issuperset_doc},
2191     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2192      reduce_doc},
2193     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2194      sizeof_doc},
2195     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2196      symmetric_difference_doc},
2197     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2198      union_doc},
2199     {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2200     {NULL,              NULL}   /* sentinel */
2201 };
2202 
2203 static PyNumberMethods frozenset_as_number = {
2204     0,                                  /*nb_add*/
2205     (binaryfunc)set_sub,                /*nb_subtract*/
2206     0,                                  /*nb_multiply*/
2207     0,                                  /*nb_remainder*/
2208     0,                                  /*nb_divmod*/
2209     0,                                  /*nb_power*/
2210     0,                                  /*nb_negative*/
2211     0,                                  /*nb_positive*/
2212     0,                                  /*nb_absolute*/
2213     0,                                  /*nb_bool*/
2214     0,                                  /*nb_invert*/
2215     0,                                  /*nb_lshift*/
2216     0,                                  /*nb_rshift*/
2217     (binaryfunc)set_and,                /*nb_and*/
2218     (binaryfunc)set_xor,                /*nb_xor*/
2219     (binaryfunc)set_or,                 /*nb_or*/
2220 };
2221 
2222 PyDoc_STRVAR(frozenset_doc,
2223 "frozenset() -> empty frozenset object\n\
2224 frozenset(iterable) -> frozenset object\n\
2225 \n\
2226 Build an immutable unordered collection of unique elements.");
2227 
2228 PyTypeObject PyFrozenSet_Type = {
2229     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2230     "frozenset",                        /* tp_name */
2231     sizeof(PySetObject),                /* tp_basicsize */
2232     0,                                  /* tp_itemsize */
2233     /* methods */
2234     (destructor)set_dealloc,            /* tp_dealloc */
2235     0,                                  /* tp_vectorcall_offset */
2236     0,                                  /* tp_getattr */
2237     0,                                  /* tp_setattr */
2238     0,                                  /* tp_as_async */
2239     (reprfunc)set_repr,                 /* tp_repr */
2240     &frozenset_as_number,               /* tp_as_number */
2241     &set_as_sequence,                   /* tp_as_sequence */
2242     0,                                  /* tp_as_mapping */
2243     frozenset_hash,                     /* tp_hash */
2244     0,                                  /* tp_call */
2245     0,                                  /* tp_str */
2246     PyObject_GenericGetAttr,            /* tp_getattro */
2247     0,                                  /* tp_setattro */
2248     0,                                  /* tp_as_buffer */
2249     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2250         Py_TPFLAGS_BASETYPE |
2251         _Py_TPFLAGS_MATCH_SELF,       /* tp_flags */
2252     frozenset_doc,                      /* tp_doc */
2253     (traverseproc)set_traverse,         /* tp_traverse */
2254     (inquiry)set_clear_internal,        /* tp_clear */
2255     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2256     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2257     (getiterfunc)set_iter,              /* tp_iter */
2258     0,                                  /* tp_iternext */
2259     frozenset_methods,                  /* tp_methods */
2260     0,                                  /* tp_members */
2261     0,                                  /* tp_getset */
2262     0,                                  /* tp_base */
2263     0,                                  /* tp_dict */
2264     0,                                  /* tp_descr_get */
2265     0,                                  /* tp_descr_set */
2266     0,                                  /* tp_dictoffset */
2267     0,                                  /* tp_init */
2268     PyType_GenericAlloc,                /* tp_alloc */
2269     frozenset_new,                      /* tp_new */
2270     PyObject_GC_Del,                    /* tp_free */
2271     .tp_vectorcall = frozenset_vectorcall,
2272 };
2273 
2274 
2275 /***** C API functions *************************************************/
2276 
2277 PyObject *
PySet_New(PyObject * iterable)2278 PySet_New(PyObject *iterable)
2279 {
2280     return make_new_set(&PySet_Type, iterable);
2281 }
2282 
2283 PyObject *
PyFrozenSet_New(PyObject * iterable)2284 PyFrozenSet_New(PyObject *iterable)
2285 {
2286     return make_new_set(&PyFrozenSet_Type, iterable);
2287 }
2288 
2289 Py_ssize_t
PySet_Size(PyObject * anyset)2290 PySet_Size(PyObject *anyset)
2291 {
2292     if (!PyAnySet_Check(anyset)) {
2293         PyErr_BadInternalCall();
2294         return -1;
2295     }
2296     return PySet_GET_SIZE(anyset);
2297 }
2298 
2299 int
PySet_Clear(PyObject * set)2300 PySet_Clear(PyObject *set)
2301 {
2302     if (!PySet_Check(set)) {
2303         PyErr_BadInternalCall();
2304         return -1;
2305     }
2306     return set_clear_internal((PySetObject *)set);
2307 }
2308 
2309 int
PySet_Contains(PyObject * anyset,PyObject * key)2310 PySet_Contains(PyObject *anyset, PyObject *key)
2311 {
2312     if (!PyAnySet_Check(anyset)) {
2313         PyErr_BadInternalCall();
2314         return -1;
2315     }
2316     return set_contains_key((PySetObject *)anyset, key);
2317 }
2318 
2319 int
PySet_Discard(PyObject * set,PyObject * key)2320 PySet_Discard(PyObject *set, PyObject *key)
2321 {
2322     if (!PySet_Check(set)) {
2323         PyErr_BadInternalCall();
2324         return -1;
2325     }
2326     return set_discard_key((PySetObject *)set, key);
2327 }
2328 
2329 int
PySet_Add(PyObject * anyset,PyObject * key)2330 PySet_Add(PyObject *anyset, PyObject *key)
2331 {
2332     if (!PySet_Check(anyset) &&
2333         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2334         PyErr_BadInternalCall();
2335         return -1;
2336     }
2337     return set_add_key((PySetObject *)anyset, key);
2338 }
2339 
2340 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2341 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2342 {
2343     setentry *entry;
2344 
2345     if (!PyAnySet_Check(set)) {
2346         PyErr_BadInternalCall();
2347         return -1;
2348     }
2349     if (set_next((PySetObject *)set, pos, &entry) == 0)
2350         return 0;
2351     *key = entry->key;
2352     *hash = entry->hash;
2353     return 1;
2354 }
2355 
2356 PyObject *
PySet_Pop(PyObject * set)2357 PySet_Pop(PyObject *set)
2358 {
2359     if (!PySet_Check(set)) {
2360         PyErr_BadInternalCall();
2361         return NULL;
2362     }
2363     return set_pop((PySetObject *)set, NULL);
2364 }
2365 
2366 int
_PySet_Update(PyObject * set,PyObject * iterable)2367 _PySet_Update(PyObject *set, PyObject *iterable)
2368 {
2369     if (!PySet_Check(set)) {
2370         PyErr_BadInternalCall();
2371         return -1;
2372     }
2373     return set_update_internal((PySetObject *)set, iterable);
2374 }
2375 
2376 /* Exported for the gdb plugin's benefit. */
2377 PyObject *_PySet_Dummy = dummy;
2378 
2379 #ifdef Py_DEBUG
2380 
2381 /* Test code to be called with any three element set.
2382    Returns True and original set is restored. */
2383 
2384 #define assertRaises(call_return_value, exception)              \
2385     do {                                                        \
2386         assert(call_return_value);                              \
2387         assert(PyErr_ExceptionMatches(exception));              \
2388         PyErr_Clear();                                          \
2389     } while(0)
2390 
2391 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2392 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2393 {
2394     Py_ssize_t count;
2395     const char *s;
2396     Py_ssize_t i;
2397     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2398     PyObject *ob = (PyObject *)so;
2399     Py_hash_t hash;
2400     PyObject *str;
2401 
2402     /* Verify preconditions */
2403     assert(PyAnySet_Check(ob));
2404     assert(PyAnySet_CheckExact(ob));
2405     assert(!PyFrozenSet_CheckExact(ob));
2406 
2407     /* so.clear(); so |= set("abc"); */
2408     str = PyUnicode_FromString("abc");
2409     if (str == NULL)
2410         return NULL;
2411     set_clear_internal(so);
2412     if (set_update_internal(so, str)) {
2413         Py_DECREF(str);
2414         return NULL;
2415     }
2416     Py_DECREF(str);
2417 
2418     /* Exercise type/size checks */
2419     assert(PySet_Size(ob) == 3);
2420     assert(PySet_GET_SIZE(ob) == 3);
2421 
2422     /* Raise TypeError for non-iterable constructor arguments */
2423     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2424     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2425 
2426     /* Raise TypeError for unhashable key */
2427     dup = PySet_New(ob);
2428     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2429     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2430     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2431 
2432     /* Exercise successful pop, contains, add, and discard */
2433     elem = PySet_Pop(ob);
2434     assert(PySet_Contains(ob, elem) == 0);
2435     assert(PySet_GET_SIZE(ob) == 2);
2436     assert(PySet_Add(ob, elem) == 0);
2437     assert(PySet_Contains(ob, elem) == 1);
2438     assert(PySet_GET_SIZE(ob) == 3);
2439     assert(PySet_Discard(ob, elem) == 1);
2440     assert(PySet_GET_SIZE(ob) == 2);
2441     assert(PySet_Discard(ob, elem) == 0);
2442     assert(PySet_GET_SIZE(ob) == 2);
2443 
2444     /* Exercise clear */
2445     dup2 = PySet_New(dup);
2446     assert(PySet_Clear(dup2) == 0);
2447     assert(PySet_Size(dup2) == 0);
2448     Py_DECREF(dup2);
2449 
2450     /* Raise SystemError on clear or update of frozen set */
2451     f = PyFrozenSet_New(dup);
2452     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2453     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2454     assert(PySet_Add(f, elem) == 0);
2455     Py_INCREF(f);
2456     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2457     Py_DECREF(f);
2458     Py_DECREF(f);
2459 
2460     /* Exercise direct iteration */
2461     i = 0, count = 0;
2462     while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2463         s = PyUnicode_AsUTF8(x);
2464         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2465         count++;
2466     }
2467     assert(count == 3);
2468 
2469     /* Exercise updates */
2470     dup2 = PySet_New(NULL);
2471     assert(_PySet_Update(dup2, dup) == 0);
2472     assert(PySet_Size(dup2) == 3);
2473     assert(_PySet_Update(dup2, dup) == 0);
2474     assert(PySet_Size(dup2) == 3);
2475     Py_DECREF(dup2);
2476 
2477     /* Raise SystemError when self argument is not a set or frozenset. */
2478     t = PyTuple_New(0);
2479     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2480     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2481     Py_DECREF(t);
2482 
2483     /* Raise SystemError when self argument is not a set. */
2484     f = PyFrozenSet_New(dup);
2485     assert(PySet_Size(f) == 3);
2486     assert(PyFrozenSet_CheckExact(f));
2487     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2488     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2489     Py_DECREF(f);
2490 
2491     /* Raise KeyError when popping from an empty set */
2492     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2493     Py_DECREF(ob);
2494     assert(PySet_GET_SIZE(ob) == 0);
2495     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2496 
2497     /* Restore the set from the copy using the PyNumber API */
2498     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2499     Py_DECREF(ob);
2500 
2501     /* Verify constructors accept NULL arguments */
2502     f = PySet_New(NULL);
2503     assert(f != NULL);
2504     assert(PySet_GET_SIZE(f) == 0);
2505     Py_DECREF(f);
2506     f = PyFrozenSet_New(NULL);
2507     assert(f != NULL);
2508     assert(PyFrozenSet_CheckExact(f));
2509     assert(PySet_GET_SIZE(f) == 0);
2510     Py_DECREF(f);
2511 
2512     Py_DECREF(elem);
2513     Py_DECREF(dup);
2514     Py_RETURN_TRUE;
2515 }
2516 
2517 #undef assertRaises
2518 
2519 #endif
2520 
2521 /***** Dummy Struct  *************************************************/
2522 
2523 static PyObject *
dummy_repr(PyObject * op)2524 dummy_repr(PyObject *op)
2525 {
2526     return PyUnicode_FromString("<dummy key>");
2527 }
2528 
2529 static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2530 dummy_dealloc(PyObject* ignore)
2531 {
2532     Py_FatalError("deallocating <dummy key>");
2533 }
2534 
2535 static PyTypeObject _PySetDummy_Type = {
2536     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2537     "<dummy key> type",
2538     0,
2539     0,
2540     dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2541     0,                  /*tp_vectorcall_offset*/
2542     0,                  /*tp_getattr*/
2543     0,                  /*tp_setattr*/
2544     0,                  /*tp_as_async*/
2545     dummy_repr,         /*tp_repr*/
2546     0,                  /*tp_as_number*/
2547     0,                  /*tp_as_sequence*/
2548     0,                  /*tp_as_mapping*/
2549     0,                  /*tp_hash */
2550     0,                  /*tp_call */
2551     0,                  /*tp_str */
2552     0,                  /*tp_getattro */
2553     0,                  /*tp_setattro */
2554     0,                  /*tp_as_buffer */
2555     Py_TPFLAGS_DEFAULT, /*tp_flags */
2556 };
2557 
2558 static PyObject _dummy_struct = {
2559   _PyObject_EXTRA_INIT
2560   2, &_PySetDummy_Type
2561 };
2562