1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <[email protected]>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and randomized probing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use more of the upper bits from the hash value and apply a simple
19 linear congruential random number generator. This helps break-up long
20 chains of collisions.
21
22 All arithmetic on hash should ignore overflow.
23
24 Unlike the dictionary implementation, the lookkey function can return
25 NULL if the rich comparison returns an error.
26
27 Use cases for sets differ considerably from dictionaries where looked-up
28 keys are more likely to be present. In contrast, sets are primarily
29 about membership testing where the presence of an element is not known in
30 advance. Accordingly, the set implementation needs to optimize for both
31 the found and not-found case.
32 */
33
34 #include "Python.h"
35 #include "pycore_object.h" // _PyObject_GC_UNTRACK()
36 #include <stddef.h> // offsetof()
37
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40
41 #define dummy (&_dummy_struct)
42
43
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58 setentry *table;
59 setentry *entry;
60 size_t perturb = hash;
61 size_t mask = so->mask;
62 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63 int probes;
64 int cmp;
65
66 while (1) {
67 entry = &so->table[i];
68 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69 do {
70 if (entry->hash == 0 && entry->key == NULL)
71 return entry;
72 if (entry->hash == hash) {
73 PyObject *startkey = entry->key;
74 assert(startkey != dummy);
75 if (startkey == key)
76 return entry;
77 if (PyUnicode_CheckExact(startkey)
78 && PyUnicode_CheckExact(key)
79 && _PyUnicode_EQ(startkey, key))
80 return entry;
81 table = so->table;
82 Py_INCREF(startkey);
83 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84 Py_DECREF(startkey);
85 if (cmp < 0)
86 return NULL;
87 if (table != so->table || entry->key != startkey)
88 return set_lookkey(so, key, hash);
89 if (cmp > 0)
90 return entry;
91 mask = so->mask;
92 }
93 entry++;
94 } while (probes--);
95 perturb >>= PERTURB_SHIFT;
96 i = (i * 5 + 1 + perturb) & mask;
97 }
98 }
99
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101
102 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105 setentry *table;
106 setentry *freeslot;
107 setentry *entry;
108 size_t perturb;
109 size_t mask;
110 size_t i; /* Unsigned for defined overflow behavior */
111 int probes;
112 int cmp;
113
114 /* Pre-increment is necessary to prevent arbitrary code in the rich
115 comparison from deallocating the key just before the insertion. */
116 Py_INCREF(key);
117
118 restart:
119
120 mask = so->mask;
121 i = (size_t)hash & mask;
122 freeslot = NULL;
123 perturb = hash;
124
125 while (1) {
126 entry = &so->table[i];
127 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
128 do {
129 if (entry->hash == 0 && entry->key == NULL)
130 goto found_unused_or_dummy;
131 if (entry->hash == hash) {
132 PyObject *startkey = entry->key;
133 assert(startkey != dummy);
134 if (startkey == key)
135 goto found_active;
136 if (PyUnicode_CheckExact(startkey)
137 && PyUnicode_CheckExact(key)
138 && _PyUnicode_EQ(startkey, key))
139 goto found_active;
140 table = so->table;
141 Py_INCREF(startkey);
142 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143 Py_DECREF(startkey);
144 if (cmp > 0)
145 goto found_active;
146 if (cmp < 0)
147 goto comparison_error;
148 if (table != so->table || entry->key != startkey)
149 goto restart;
150 mask = so->mask;
151 }
152 else if (entry->hash == -1) {
153 assert (entry->key == dummy);
154 freeslot = entry;
155 }
156 entry++;
157 } while (probes--);
158 perturb >>= PERTURB_SHIFT;
159 i = (i * 5 + 1 + perturb) & mask;
160 }
161
162 found_unused_or_dummy:
163 if (freeslot == NULL)
164 goto found_unused;
165 so->used++;
166 freeslot->key = key;
167 freeslot->hash = hash;
168 return 0;
169
170 found_unused:
171 so->fill++;
172 so->used++;
173 entry->key = key;
174 entry->hash = hash;
175 if ((size_t)so->fill*5 < mask*3)
176 return 0;
177 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
178
179 found_active:
180 Py_DECREF(key);
181 return 0;
182
183 comparison_error:
184 Py_DECREF(key);
185 return -1;
186 }
187
188 /*
189 Internal routine used by set_table_resize() to insert an item which is
190 known to be absent from the set. Besides the performance benefit,
191 there is also safety benefit since using set_add_entry() risks making
192 a callback in the middle of a set_table_resize(), see issue 1456209.
193 The caller is responsible for updating the key's reference count and
194 the setobject's fill and used fields.
195 */
196 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)197 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
198 {
199 setentry *entry;
200 size_t perturb = hash;
201 size_t i = (size_t)hash & mask;
202 size_t j;
203
204 while (1) {
205 entry = &table[i];
206 if (entry->key == NULL)
207 goto found_null;
208 if (i + LINEAR_PROBES <= mask) {
209 for (j = 0; j < LINEAR_PROBES; j++) {
210 entry++;
211 if (entry->key == NULL)
212 goto found_null;
213 }
214 }
215 perturb >>= PERTURB_SHIFT;
216 i = (i * 5 + 1 + perturb) & mask;
217 }
218 found_null:
219 entry->key = key;
220 entry->hash = hash;
221 }
222
223 /* ======== End logic for probing the hash table ========================== */
224 /* ======================================================================== */
225
226 /*
227 Restructure the table by allocating a new table and reinserting all
228 keys again. When entries have been deleted, the new table may
229 actually be smaller than the old one.
230 */
231 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)232 set_table_resize(PySetObject *so, Py_ssize_t minused)
233 {
234 setentry *oldtable, *newtable, *entry;
235 Py_ssize_t oldmask = so->mask;
236 size_t newmask;
237 int is_oldtable_malloced;
238 setentry small_copy[PySet_MINSIZE];
239
240 assert(minused >= 0);
241
242 /* Find the smallest table size > minused. */
243 /* XXX speed-up with intrinsics */
244 size_t newsize = PySet_MINSIZE;
245 while (newsize <= (size_t)minused) {
246 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
247 }
248
249 /* Get space for a new table. */
250 oldtable = so->table;
251 assert(oldtable != NULL);
252 is_oldtable_malloced = oldtable != so->smalltable;
253
254 if (newsize == PySet_MINSIZE) {
255 /* A large table is shrinking, or we can't get any smaller. */
256 newtable = so->smalltable;
257 if (newtable == oldtable) {
258 if (so->fill == so->used) {
259 /* No dummies, so no point doing anything. */
260 return 0;
261 }
262 /* We're not going to resize it, but rebuild the
263 table anyway to purge old dummy entries.
264 Subtle: This is *necessary* if fill==size,
265 as set_lookkey needs at least one virgin slot to
266 terminate failing searches. If fill < size, it's
267 merely desirable, as dummies slow searches. */
268 assert(so->fill > so->used);
269 memcpy(small_copy, oldtable, sizeof(small_copy));
270 oldtable = small_copy;
271 }
272 }
273 else {
274 newtable = PyMem_NEW(setentry, newsize);
275 if (newtable == NULL) {
276 PyErr_NoMemory();
277 return -1;
278 }
279 }
280
281 /* Make the set empty, using the new table. */
282 assert(newtable != oldtable);
283 memset(newtable, 0, sizeof(setentry) * newsize);
284 so->mask = newsize - 1;
285 so->table = newtable;
286
287 /* Copy the data over; this is refcount-neutral for active entries;
288 dummy entries aren't copied over, of course */
289 newmask = (size_t)so->mask;
290 if (so->fill == so->used) {
291 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
292 if (entry->key != NULL) {
293 set_insert_clean(newtable, newmask, entry->key, entry->hash);
294 }
295 }
296 } else {
297 so->fill = so->used;
298 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
299 if (entry->key != NULL && entry->key != dummy) {
300 set_insert_clean(newtable, newmask, entry->key, entry->hash);
301 }
302 }
303 }
304
305 if (is_oldtable_malloced)
306 PyMem_Free(oldtable);
307 return 0;
308 }
309
310 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313 setentry *entry;
314
315 entry = set_lookkey(so, key, hash);
316 if (entry != NULL)
317 return entry->key != NULL;
318 return -1;
319 }
320
321 #define DISCARD_NOTFOUND 0
322 #define DISCARD_FOUND 1
323
324 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)325 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
326 {
327 setentry *entry;
328 PyObject *old_key;
329
330 entry = set_lookkey(so, key, hash);
331 if (entry == NULL)
332 return -1;
333 if (entry->key == NULL)
334 return DISCARD_NOTFOUND;
335 old_key = entry->key;
336 entry->key = dummy;
337 entry->hash = -1;
338 so->used--;
339 Py_DECREF(old_key);
340 return DISCARD_FOUND;
341 }
342
343 static int
set_add_key(PySetObject * so,PyObject * key)344 set_add_key(PySetObject *so, PyObject *key)
345 {
346 Py_hash_t hash;
347
348 if (!PyUnicode_CheckExact(key) ||
349 (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
350 hash = PyObject_Hash(key);
351 if (hash == -1)
352 return -1;
353 }
354 return set_add_entry(so, key, hash);
355 }
356
357 static int
set_contains_key(PySetObject * so,PyObject * key)358 set_contains_key(PySetObject *so, PyObject *key)
359 {
360 Py_hash_t hash;
361
362 if (!PyUnicode_CheckExact(key) ||
363 (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
364 hash = PyObject_Hash(key);
365 if (hash == -1)
366 return -1;
367 }
368 return set_contains_entry(so, key, hash);
369 }
370
371 static int
set_discard_key(PySetObject * so,PyObject * key)372 set_discard_key(PySetObject *so, PyObject *key)
373 {
374 Py_hash_t hash;
375
376 if (!PyUnicode_CheckExact(key) ||
377 (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
378 hash = PyObject_Hash(key);
379 if (hash == -1)
380 return -1;
381 }
382 return set_discard_entry(so, key, hash);
383 }
384
385 static void
set_empty_to_minsize(PySetObject * so)386 set_empty_to_minsize(PySetObject *so)
387 {
388 memset(so->smalltable, 0, sizeof(so->smalltable));
389 so->fill = 0;
390 so->used = 0;
391 so->mask = PySet_MINSIZE - 1;
392 so->table = so->smalltable;
393 so->hash = -1;
394 }
395
396 static int
set_clear_internal(PySetObject * so)397 set_clear_internal(PySetObject *so)
398 {
399 setentry *entry;
400 setentry *table = so->table;
401 Py_ssize_t fill = so->fill;
402 Py_ssize_t used = so->used;
403 int table_is_malloced = table != so->smalltable;
404 setentry small_copy[PySet_MINSIZE];
405
406 assert (PyAnySet_Check(so));
407 assert(table != NULL);
408
409 /* This is delicate. During the process of clearing the set,
410 * decrefs can cause the set to mutate. To avoid fatal confusion
411 * (voice of experience), we have to make the set empty before
412 * clearing the slots, and never refer to anything via so->ref while
413 * clearing.
414 */
415 if (table_is_malloced)
416 set_empty_to_minsize(so);
417
418 else if (fill > 0) {
419 /* It's a small table with something that needs to be cleared.
420 * Afraid the only safe way is to copy the set entries into
421 * another small table first.
422 */
423 memcpy(small_copy, table, sizeof(small_copy));
424 table = small_copy;
425 set_empty_to_minsize(so);
426 }
427 /* else it's a small table that's already empty */
428
429 /* Now we can finally clear things. If C had refcounts, we could
430 * assert that the refcount on table is 1 now, i.e. that this function
431 * has unique access to it, so decref side-effects can't alter it.
432 */
433 for (entry = table; used > 0; entry++) {
434 if (entry->key && entry->key != dummy) {
435 used--;
436 Py_DECREF(entry->key);
437 }
438 }
439
440 if (table_is_malloced)
441 PyMem_Free(table);
442 return 0;
443 }
444
445 /*
446 * Iterate over a set table. Use like so:
447 *
448 * Py_ssize_t pos;
449 * setentry *entry;
450 * pos = 0; # important! pos should not otherwise be changed by you
451 * while (set_next(yourset, &pos, &entry)) {
452 * Refer to borrowed reference in entry->key.
453 * }
454 *
455 * CAUTION: In general, it isn't safe to use set_next in a loop that
456 * mutates the table.
457 */
458 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)459 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
460 {
461 Py_ssize_t i;
462 Py_ssize_t mask;
463 setentry *entry;
464
465 assert (PyAnySet_Check(so));
466 i = *pos_ptr;
467 assert(i >= 0);
468 mask = so->mask;
469 entry = &so->table[i];
470 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
471 i++;
472 entry++;
473 }
474 *pos_ptr = i+1;
475 if (i > mask)
476 return 0;
477 assert(entry != NULL);
478 *entry_ptr = entry;
479 return 1;
480 }
481
482 static void
set_dealloc(PySetObject * so)483 set_dealloc(PySetObject *so)
484 {
485 setentry *entry;
486 Py_ssize_t used = so->used;
487
488 /* bpo-31095: UnTrack is needed before calling any callbacks */
489 PyObject_GC_UnTrack(so);
490 Py_TRASHCAN_BEGIN(so, set_dealloc)
491 if (so->weakreflist != NULL)
492 PyObject_ClearWeakRefs((PyObject *) so);
493
494 for (entry = so->table; used > 0; entry++) {
495 if (entry->key && entry->key != dummy) {
496 used--;
497 Py_DECREF(entry->key);
498 }
499 }
500 if (so->table != so->smalltable)
501 PyMem_Free(so->table);
502 Py_TYPE(so)->tp_free(so);
503 Py_TRASHCAN_END
504 }
505
506 static PyObject *
set_repr(PySetObject * so)507 set_repr(PySetObject *so)
508 {
509 PyObject *result=NULL, *keys, *listrepr, *tmp;
510 int status = Py_ReprEnter((PyObject*)so);
511
512 if (status != 0) {
513 if (status < 0)
514 return NULL;
515 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
516 }
517
518 /* shortcut for the empty set */
519 if (!so->used) {
520 Py_ReprLeave((PyObject*)so);
521 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
522 }
523
524 keys = PySequence_List((PyObject *)so);
525 if (keys == NULL)
526 goto done;
527
528 /* repr(keys)[1:-1] */
529 listrepr = PyObject_Repr(keys);
530 Py_DECREF(keys);
531 if (listrepr == NULL)
532 goto done;
533 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
534 Py_DECREF(listrepr);
535 if (tmp == NULL)
536 goto done;
537 listrepr = tmp;
538
539 if (!PySet_CheckExact(so))
540 result = PyUnicode_FromFormat("%s({%U})",
541 Py_TYPE(so)->tp_name,
542 listrepr);
543 else
544 result = PyUnicode_FromFormat("{%U}", listrepr);
545 Py_DECREF(listrepr);
546 done:
547 Py_ReprLeave((PyObject*)so);
548 return result;
549 }
550
551 static Py_ssize_t
set_len(PyObject * so)552 set_len(PyObject *so)
553 {
554 return ((PySetObject *)so)->used;
555 }
556
557 static int
set_merge(PySetObject * so,PyObject * otherset)558 set_merge(PySetObject *so, PyObject *otherset)
559 {
560 PySetObject *other;
561 PyObject *key;
562 Py_ssize_t i;
563 setentry *so_entry;
564 setentry *other_entry;
565
566 assert (PyAnySet_Check(so));
567 assert (PyAnySet_Check(otherset));
568
569 other = (PySetObject*)otherset;
570 if (other == so || other->used == 0)
571 /* a.update(a) or a.update(set()); nothing to do */
572 return 0;
573 /* Do one big resize at the start, rather than
574 * incrementally resizing as we insert new keys. Expect
575 * that there will be no (or few) overlapping keys.
576 */
577 if ((so->fill + other->used)*5 >= so->mask*3) {
578 if (set_table_resize(so, (so->used + other->used)*2) != 0)
579 return -1;
580 }
581 so_entry = so->table;
582 other_entry = other->table;
583
584 /* If our table is empty, and both tables have the same size, and
585 there are no dummies to eliminate, then just copy the pointers. */
586 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
587 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
588 key = other_entry->key;
589 if (key != NULL) {
590 assert(so_entry->key == NULL);
591 Py_INCREF(key);
592 so_entry->key = key;
593 so_entry->hash = other_entry->hash;
594 }
595 }
596 so->fill = other->fill;
597 so->used = other->used;
598 return 0;
599 }
600
601 /* If our table is empty, we can use set_insert_clean() */
602 if (so->fill == 0) {
603 setentry *newtable = so->table;
604 size_t newmask = (size_t)so->mask;
605 so->fill = other->used;
606 so->used = other->used;
607 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
608 key = other_entry->key;
609 if (key != NULL && key != dummy) {
610 Py_INCREF(key);
611 set_insert_clean(newtable, newmask, key, other_entry->hash);
612 }
613 }
614 return 0;
615 }
616
617 /* We can't assure there are no duplicates, so do normal insertions */
618 for (i = 0; i <= other->mask; i++) {
619 other_entry = &other->table[i];
620 key = other_entry->key;
621 if (key != NULL && key != dummy) {
622 if (set_add_entry(so, key, other_entry->hash))
623 return -1;
624 }
625 }
626 return 0;
627 }
628
629 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))630 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
631 {
632 /* Make sure the search finger is in bounds */
633 setentry *entry = so->table + (so->finger & so->mask);
634 setentry *limit = so->table + so->mask;
635 PyObject *key;
636
637 if (so->used == 0) {
638 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
639 return NULL;
640 }
641 while (entry->key == NULL || entry->key==dummy) {
642 entry++;
643 if (entry > limit)
644 entry = so->table;
645 }
646 key = entry->key;
647 entry->key = dummy;
648 entry->hash = -1;
649 so->used--;
650 so->finger = entry - so->table + 1; /* next place to start */
651 return key;
652 }
653
654 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
655 Raises KeyError if the set is empty.");
656
657 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)658 set_traverse(PySetObject *so, visitproc visit, void *arg)
659 {
660 Py_ssize_t pos = 0;
661 setentry *entry;
662
663 while (set_next(so, &pos, &entry))
664 Py_VISIT(entry->key);
665 return 0;
666 }
667
668 /* Work to increase the bit dispersion for closely spaced hash values.
669 This is important because some use cases have many combinations of a
670 small number of elements with nearby hashes so that many distinct
671 combinations collapse to only a handful of distinct hash values. */
672
673 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)674 _shuffle_bits(Py_uhash_t h)
675 {
676 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
677 }
678
679 /* Most of the constants in this hash algorithm are randomly chosen
680 large primes with "interesting bit patterns" and that passed tests
681 for good collision statistics on a variety of problematic datasets
682 including powersets and graph structures (such as David Eppstein's
683 graph recipes in Lib/test/test_set.py) */
684
685 static Py_hash_t
frozenset_hash(PyObject * self)686 frozenset_hash(PyObject *self)
687 {
688 PySetObject *so = (PySetObject *)self;
689 Py_uhash_t hash = 0;
690 setentry *entry;
691
692 if (so->hash != -1)
693 return so->hash;
694
695 /* Xor-in shuffled bits from every entry's hash field because xor is
696 commutative and a frozenset hash should be independent of order.
697
698 For speed, include null entries and dummy entries and then
699 subtract out their effect afterwards so that the final hash
700 depends only on active entries. This allows the code to be
701 vectorized by the compiler and it saves the unpredictable
702 branches that would arise when trying to exclude null and dummy
703 entries on every iteration. */
704
705 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
706 hash ^= _shuffle_bits(entry->hash);
707
708 /* Remove the effect of an odd number of NULL entries */
709 if ((so->mask + 1 - so->fill) & 1)
710 hash ^= _shuffle_bits(0);
711
712 /* Remove the effect of an odd number of dummy entries */
713 if ((so->fill - so->used) & 1)
714 hash ^= _shuffle_bits(-1);
715
716 /* Factor in the number of active entries */
717 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
718
719 /* Disperse patterns arising in nested frozensets */
720 hash ^= (hash >> 11) ^ (hash >> 25);
721 hash = hash * 69069U + 907133923UL;
722
723 /* -1 is reserved as an error code */
724 if (hash == (Py_uhash_t)-1)
725 hash = 590923713UL;
726
727 so->hash = hash;
728 return hash;
729 }
730
731 /***** Set iterator type ***********************************************/
732
733 typedef struct {
734 PyObject_HEAD
735 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
736 Py_ssize_t si_used;
737 Py_ssize_t si_pos;
738 Py_ssize_t len;
739 } setiterobject;
740
741 static void
setiter_dealloc(setiterobject * si)742 setiter_dealloc(setiterobject *si)
743 {
744 /* bpo-31095: UnTrack is needed before calling any callbacks */
745 _PyObject_GC_UNTRACK(si);
746 Py_XDECREF(si->si_set);
747 PyObject_GC_Del(si);
748 }
749
750 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)751 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
752 {
753 Py_VISIT(si->si_set);
754 return 0;
755 }
756
757 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))758 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
759 {
760 Py_ssize_t len = 0;
761 if (si->si_set != NULL && si->si_used == si->si_set->used)
762 len = si->len;
763 return PyLong_FromSsize_t(len);
764 }
765
766 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
767
768 static PyObject *setiter_iternext(setiterobject *si);
769
770 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))771 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
772 {
773 /* copy the iterator state */
774 setiterobject tmp = *si;
775 Py_XINCREF(tmp.si_set);
776
777 /* iterate the temporary into a list */
778 PyObject *list = PySequence_List((PyObject*)&tmp);
779 Py_XDECREF(tmp.si_set);
780 if (list == NULL) {
781 return NULL;
782 }
783 return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), list);
784 }
785
786 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
787
788 static PyMethodDef setiter_methods[] = {
789 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
790 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
791 {NULL, NULL} /* sentinel */
792 };
793
setiter_iternext(setiterobject * si)794 static PyObject *setiter_iternext(setiterobject *si)
795 {
796 PyObject *key;
797 Py_ssize_t i, mask;
798 setentry *entry;
799 PySetObject *so = si->si_set;
800
801 if (so == NULL)
802 return NULL;
803 assert (PyAnySet_Check(so));
804
805 if (si->si_used != so->used) {
806 PyErr_SetString(PyExc_RuntimeError,
807 "Set changed size during iteration");
808 si->si_used = -1; /* Make this state sticky */
809 return NULL;
810 }
811
812 i = si->si_pos;
813 assert(i>=0);
814 entry = so->table;
815 mask = so->mask;
816 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
817 i++;
818 si->si_pos = i+1;
819 if (i > mask)
820 goto fail;
821 si->len--;
822 key = entry[i].key;
823 Py_INCREF(key);
824 return key;
825
826 fail:
827 si->si_set = NULL;
828 Py_DECREF(so);
829 return NULL;
830 }
831
832 PyTypeObject PySetIter_Type = {
833 PyVarObject_HEAD_INIT(&PyType_Type, 0)
834 "set_iterator", /* tp_name */
835 sizeof(setiterobject), /* tp_basicsize */
836 0, /* tp_itemsize */
837 /* methods */
838 (destructor)setiter_dealloc, /* tp_dealloc */
839 0, /* tp_vectorcall_offset */
840 0, /* tp_getattr */
841 0, /* tp_setattr */
842 0, /* tp_as_async */
843 0, /* tp_repr */
844 0, /* tp_as_number */
845 0, /* tp_as_sequence */
846 0, /* tp_as_mapping */
847 0, /* tp_hash */
848 0, /* tp_call */
849 0, /* tp_str */
850 PyObject_GenericGetAttr, /* tp_getattro */
851 0, /* tp_setattro */
852 0, /* tp_as_buffer */
853 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
854 0, /* tp_doc */
855 (traverseproc)setiter_traverse, /* tp_traverse */
856 0, /* tp_clear */
857 0, /* tp_richcompare */
858 0, /* tp_weaklistoffset */
859 PyObject_SelfIter, /* tp_iter */
860 (iternextfunc)setiter_iternext, /* tp_iternext */
861 setiter_methods, /* tp_methods */
862 0,
863 };
864
865 static PyObject *
set_iter(PySetObject * so)866 set_iter(PySetObject *so)
867 {
868 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
869 if (si == NULL)
870 return NULL;
871 Py_INCREF(so);
872 si->si_set = so;
873 si->si_used = so->used;
874 si->si_pos = 0;
875 si->len = so->used;
876 _PyObject_GC_TRACK(si);
877 return (PyObject *)si;
878 }
879
880 static int
set_update_internal(PySetObject * so,PyObject * other)881 set_update_internal(PySetObject *so, PyObject *other)
882 {
883 PyObject *key, *it;
884
885 if (PyAnySet_Check(other))
886 return set_merge(so, other);
887
888 if (PyDict_CheckExact(other)) {
889 PyObject *value;
890 Py_ssize_t pos = 0;
891 Py_hash_t hash;
892 Py_ssize_t dictsize = PyDict_GET_SIZE(other);
893
894 /* Do one big resize at the start, rather than
895 * incrementally resizing as we insert new keys. Expect
896 * that there will be no (or few) overlapping keys.
897 */
898 if (dictsize < 0)
899 return -1;
900 if ((so->fill + dictsize)*5 >= so->mask*3) {
901 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
902 return -1;
903 }
904 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
905 if (set_add_entry(so, key, hash))
906 return -1;
907 }
908 return 0;
909 }
910
911 it = PyObject_GetIter(other);
912 if (it == NULL)
913 return -1;
914
915 while ((key = PyIter_Next(it)) != NULL) {
916 if (set_add_key(so, key)) {
917 Py_DECREF(it);
918 Py_DECREF(key);
919 return -1;
920 }
921 Py_DECREF(key);
922 }
923 Py_DECREF(it);
924 if (PyErr_Occurred())
925 return -1;
926 return 0;
927 }
928
929 static PyObject *
set_update(PySetObject * so,PyObject * args)930 set_update(PySetObject *so, PyObject *args)
931 {
932 Py_ssize_t i;
933
934 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
935 PyObject *other = PyTuple_GET_ITEM(args, i);
936 if (set_update_internal(so, other))
937 return NULL;
938 }
939 Py_RETURN_NONE;
940 }
941
942 PyDoc_STRVAR(update_doc,
943 "Update a set with the union of itself and others.");
944
945 /* XXX Todo:
946 If aligned memory allocations become available, make the
947 set object 64 byte aligned so that most of the fields
948 can be retrieved or updated in a single cache line.
949 */
950
951 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)952 make_new_set(PyTypeObject *type, PyObject *iterable)
953 {
954 assert(PyType_Check(type));
955 PySetObject *so;
956
957 so = (PySetObject *)type->tp_alloc(type, 0);
958 if (so == NULL)
959 return NULL;
960
961 so->fill = 0;
962 so->used = 0;
963 so->mask = PySet_MINSIZE - 1;
964 so->table = so->smalltable;
965 so->hash = -1;
966 so->finger = 0;
967 so->weakreflist = NULL;
968
969 if (iterable != NULL) {
970 if (set_update_internal(so, iterable)) {
971 Py_DECREF(so);
972 return NULL;
973 }
974 }
975
976 return (PyObject *)so;
977 }
978
979 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)980 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
981 {
982 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
983 if (PyType_IsSubtype(type, &PySet_Type))
984 type = &PySet_Type;
985 else
986 type = &PyFrozenSet_Type;
987 }
988 return make_new_set(type, iterable);
989 }
990
991 static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)992 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
993 {
994 if (type != &PyFrozenSet_Type) {
995 return make_new_set(type, iterable);
996 }
997
998 if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) {
999 /* frozenset(f) is idempotent */
1000 Py_INCREF(iterable);
1001 return iterable;
1002 }
1003 return make_new_set(type, iterable);
1004 }
1005
1006 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1007 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1008 {
1009 PyObject *iterable = NULL;
1010
1011 if ((type == &PyFrozenSet_Type ||
1012 type->tp_init == PyFrozenSet_Type.tp_init) &&
1013 !_PyArg_NoKeywords("frozenset", kwds)) {
1014 return NULL;
1015 }
1016
1017 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1018 return NULL;
1019 }
1020
1021 return make_new_frozenset(type, iterable);
1022 }
1023
1024 static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1025 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1026 size_t nargsf, PyObject *kwnames)
1027 {
1028 if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1029 return NULL;
1030 }
1031
1032 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1033 if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1034 return NULL;
1035 }
1036
1037 PyObject *iterable = (nargs ? args[0] : NULL);
1038 return make_new_frozenset(_PyType_CAST(type), iterable);
1039 }
1040
1041 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1042 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1043 {
1044 return make_new_set(type, NULL);
1045 }
1046
1047 /* set_swap_bodies() switches the contents of any two sets by moving their
1048 internal data pointers and, if needed, copying the internal smalltables.
1049 Semantically equivalent to:
1050
1051 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1052
1053 The function always succeeds and it leaves both objects in a stable state.
1054 Useful for operations that update in-place (by allowing an intermediate
1055 result to be swapped into one of the original inputs).
1056 */
1057
1058 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1059 set_swap_bodies(PySetObject *a, PySetObject *b)
1060 {
1061 Py_ssize_t t;
1062 setentry *u;
1063 setentry tab[PySet_MINSIZE];
1064 Py_hash_t h;
1065
1066 t = a->fill; a->fill = b->fill; b->fill = t;
1067 t = a->used; a->used = b->used; b->used = t;
1068 t = a->mask; a->mask = b->mask; b->mask = t;
1069
1070 u = a->table;
1071 if (a->table == a->smalltable)
1072 u = b->smalltable;
1073 a->table = b->table;
1074 if (b->table == b->smalltable)
1075 a->table = a->smalltable;
1076 b->table = u;
1077
1078 if (a->table == a->smalltable || b->table == b->smalltable) {
1079 memcpy(tab, a->smalltable, sizeof(tab));
1080 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1081 memcpy(b->smalltable, tab, sizeof(tab));
1082 }
1083
1084 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1085 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1086 h = a->hash; a->hash = b->hash; b->hash = h;
1087 } else {
1088 a->hash = -1;
1089 b->hash = -1;
1090 }
1091 }
1092
1093 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1094 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1095 {
1096 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1097 }
1098
1099 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1100 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1101 {
1102 if (PyFrozenSet_CheckExact(so)) {
1103 Py_INCREF(so);
1104 return (PyObject *)so;
1105 }
1106 return set_copy(so, NULL);
1107 }
1108
1109 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1110
1111 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1112 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1113 {
1114 set_clear_internal(so);
1115 Py_RETURN_NONE;
1116 }
1117
1118 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1119
1120 static PyObject *
set_union(PySetObject * so,PyObject * args)1121 set_union(PySetObject *so, PyObject *args)
1122 {
1123 PySetObject *result;
1124 PyObject *other;
1125 Py_ssize_t i;
1126
1127 result = (PySetObject *)set_copy(so, NULL);
1128 if (result == NULL)
1129 return NULL;
1130
1131 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1132 other = PyTuple_GET_ITEM(args, i);
1133 if ((PyObject *)so == other)
1134 continue;
1135 if (set_update_internal(result, other)) {
1136 Py_DECREF(result);
1137 return NULL;
1138 }
1139 }
1140 return (PyObject *)result;
1141 }
1142
1143 PyDoc_STRVAR(union_doc,
1144 "Return the union of sets as a new set.\n\
1145 \n\
1146 (i.e. all elements that are in either set.)");
1147
1148 static PyObject *
set_or(PySetObject * so,PyObject * other)1149 set_or(PySetObject *so, PyObject *other)
1150 {
1151 PySetObject *result;
1152
1153 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1154 Py_RETURN_NOTIMPLEMENTED;
1155
1156 result = (PySetObject *)set_copy(so, NULL);
1157 if (result == NULL)
1158 return NULL;
1159 if ((PyObject *)so == other)
1160 return (PyObject *)result;
1161 if (set_update_internal(result, other)) {
1162 Py_DECREF(result);
1163 return NULL;
1164 }
1165 return (PyObject *)result;
1166 }
1167
1168 static PyObject *
set_ior(PySetObject * so,PyObject * other)1169 set_ior(PySetObject *so, PyObject *other)
1170 {
1171 if (!PyAnySet_Check(other))
1172 Py_RETURN_NOTIMPLEMENTED;
1173
1174 if (set_update_internal(so, other))
1175 return NULL;
1176 Py_INCREF(so);
1177 return (PyObject *)so;
1178 }
1179
1180 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1181 set_intersection(PySetObject *so, PyObject *other)
1182 {
1183 PySetObject *result;
1184 PyObject *key, *it, *tmp;
1185 Py_hash_t hash;
1186 int rv;
1187
1188 if ((PyObject *)so == other)
1189 return set_copy(so, NULL);
1190
1191 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1192 if (result == NULL)
1193 return NULL;
1194
1195 if (PyAnySet_Check(other)) {
1196 Py_ssize_t pos = 0;
1197 setentry *entry;
1198
1199 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1200 tmp = (PyObject *)so;
1201 so = (PySetObject *)other;
1202 other = tmp;
1203 }
1204
1205 while (set_next((PySetObject *)other, &pos, &entry)) {
1206 key = entry->key;
1207 hash = entry->hash;
1208 Py_INCREF(key);
1209 rv = set_contains_entry(so, key, hash);
1210 if (rv < 0) {
1211 Py_DECREF(result);
1212 Py_DECREF(key);
1213 return NULL;
1214 }
1215 if (rv) {
1216 if (set_add_entry(result, key, hash)) {
1217 Py_DECREF(result);
1218 Py_DECREF(key);
1219 return NULL;
1220 }
1221 }
1222 Py_DECREF(key);
1223 }
1224 return (PyObject *)result;
1225 }
1226
1227 it = PyObject_GetIter(other);
1228 if (it == NULL) {
1229 Py_DECREF(result);
1230 return NULL;
1231 }
1232
1233 while ((key = PyIter_Next(it)) != NULL) {
1234 hash = PyObject_Hash(key);
1235 if (hash == -1)
1236 goto error;
1237 rv = set_contains_entry(so, key, hash);
1238 if (rv < 0)
1239 goto error;
1240 if (rv) {
1241 if (set_add_entry(result, key, hash))
1242 goto error;
1243 if (PySet_GET_SIZE(result) >= PySet_GET_SIZE(so)) {
1244 Py_DECREF(key);
1245 break;
1246 }
1247 }
1248 Py_DECREF(key);
1249 }
1250 Py_DECREF(it);
1251 if (PyErr_Occurred()) {
1252 Py_DECREF(result);
1253 return NULL;
1254 }
1255 return (PyObject *)result;
1256 error:
1257 Py_DECREF(it);
1258 Py_DECREF(result);
1259 Py_DECREF(key);
1260 return NULL;
1261 }
1262
1263 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1264 set_intersection_multi(PySetObject *so, PyObject *args)
1265 {
1266 Py_ssize_t i;
1267 PyObject *result = (PyObject *)so;
1268
1269 if (PyTuple_GET_SIZE(args) == 0)
1270 return set_copy(so, NULL);
1271
1272 Py_INCREF(so);
1273 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1274 PyObject *other = PyTuple_GET_ITEM(args, i);
1275 PyObject *newresult = set_intersection((PySetObject *)result, other);
1276 if (newresult == NULL) {
1277 Py_DECREF(result);
1278 return NULL;
1279 }
1280 Py_DECREF(result);
1281 result = newresult;
1282 }
1283 return result;
1284 }
1285
1286 PyDoc_STRVAR(intersection_doc,
1287 "Return the intersection of two sets as a new set.\n\
1288 \n\
1289 (i.e. all elements that are in both sets.)");
1290
1291 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1292 set_intersection_update(PySetObject *so, PyObject *other)
1293 {
1294 PyObject *tmp;
1295
1296 tmp = set_intersection(so, other);
1297 if (tmp == NULL)
1298 return NULL;
1299 set_swap_bodies(so, (PySetObject *)tmp);
1300 Py_DECREF(tmp);
1301 Py_RETURN_NONE;
1302 }
1303
1304 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1305 set_intersection_update_multi(PySetObject *so, PyObject *args)
1306 {
1307 PyObject *tmp;
1308
1309 tmp = set_intersection_multi(so, args);
1310 if (tmp == NULL)
1311 return NULL;
1312 set_swap_bodies(so, (PySetObject *)tmp);
1313 Py_DECREF(tmp);
1314 Py_RETURN_NONE;
1315 }
1316
1317 PyDoc_STRVAR(intersection_update_doc,
1318 "Update a set with the intersection of itself and another.");
1319
1320 static PyObject *
set_and(PySetObject * so,PyObject * other)1321 set_and(PySetObject *so, PyObject *other)
1322 {
1323 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1324 Py_RETURN_NOTIMPLEMENTED;
1325 return set_intersection(so, other);
1326 }
1327
1328 static PyObject *
set_iand(PySetObject * so,PyObject * other)1329 set_iand(PySetObject *so, PyObject *other)
1330 {
1331 PyObject *result;
1332
1333 if (!PyAnySet_Check(other))
1334 Py_RETURN_NOTIMPLEMENTED;
1335 result = set_intersection_update(so, other);
1336 if (result == NULL)
1337 return NULL;
1338 Py_DECREF(result);
1339 Py_INCREF(so);
1340 return (PyObject *)so;
1341 }
1342
1343 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1344 set_isdisjoint(PySetObject *so, PyObject *other)
1345 {
1346 PyObject *key, *it, *tmp;
1347 int rv;
1348
1349 if ((PyObject *)so == other) {
1350 if (PySet_GET_SIZE(so) == 0)
1351 Py_RETURN_TRUE;
1352 else
1353 Py_RETURN_FALSE;
1354 }
1355
1356 if (PyAnySet_CheckExact(other)) {
1357 Py_ssize_t pos = 0;
1358 setentry *entry;
1359
1360 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1361 tmp = (PyObject *)so;
1362 so = (PySetObject *)other;
1363 other = tmp;
1364 }
1365 while (set_next((PySetObject *)other, &pos, &entry)) {
1366 PyObject *key = entry->key;
1367 Py_INCREF(key);
1368 rv = set_contains_entry(so, key, entry->hash);
1369 Py_DECREF(key);
1370 if (rv < 0) {
1371 return NULL;
1372 }
1373 if (rv) {
1374 Py_RETURN_FALSE;
1375 }
1376 }
1377 Py_RETURN_TRUE;
1378 }
1379
1380 it = PyObject_GetIter(other);
1381 if (it == NULL)
1382 return NULL;
1383
1384 while ((key = PyIter_Next(it)) != NULL) {
1385 rv = set_contains_key(so, key);
1386 Py_DECREF(key);
1387 if (rv < 0) {
1388 Py_DECREF(it);
1389 return NULL;
1390 }
1391 if (rv) {
1392 Py_DECREF(it);
1393 Py_RETURN_FALSE;
1394 }
1395 }
1396 Py_DECREF(it);
1397 if (PyErr_Occurred())
1398 return NULL;
1399 Py_RETURN_TRUE;
1400 }
1401
1402 PyDoc_STRVAR(isdisjoint_doc,
1403 "Return True if two sets have a null intersection.");
1404
1405 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1406 set_difference_update_internal(PySetObject *so, PyObject *other)
1407 {
1408 if ((PyObject *)so == other)
1409 return set_clear_internal(so);
1410
1411 if (PyAnySet_Check(other)) {
1412 setentry *entry;
1413 Py_ssize_t pos = 0;
1414
1415 /* Optimization: When the other set is more than 8 times
1416 larger than the base set, replace the other set with
1417 intersection of the two sets.
1418 */
1419 if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1420 other = set_intersection(so, other);
1421 if (other == NULL)
1422 return -1;
1423 } else {
1424 Py_INCREF(other);
1425 }
1426
1427 while (set_next((PySetObject *)other, &pos, &entry)) {
1428 PyObject *key = entry->key;
1429 Py_INCREF(key);
1430 if (set_discard_entry(so, key, entry->hash) < 0) {
1431 Py_DECREF(other);
1432 Py_DECREF(key);
1433 return -1;
1434 }
1435 Py_DECREF(key);
1436 }
1437
1438 Py_DECREF(other);
1439 } else {
1440 PyObject *key, *it;
1441 it = PyObject_GetIter(other);
1442 if (it == NULL)
1443 return -1;
1444
1445 while ((key = PyIter_Next(it)) != NULL) {
1446 if (set_discard_key(so, key) < 0) {
1447 Py_DECREF(it);
1448 Py_DECREF(key);
1449 return -1;
1450 }
1451 Py_DECREF(key);
1452 }
1453 Py_DECREF(it);
1454 if (PyErr_Occurred())
1455 return -1;
1456 }
1457 /* If more than 1/4th are dummies, then resize them away. */
1458 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1459 return 0;
1460 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1461 }
1462
1463 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1464 set_difference_update(PySetObject *so, PyObject *args)
1465 {
1466 Py_ssize_t i;
1467
1468 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1469 PyObject *other = PyTuple_GET_ITEM(args, i);
1470 if (set_difference_update_internal(so, other))
1471 return NULL;
1472 }
1473 Py_RETURN_NONE;
1474 }
1475
1476 PyDoc_STRVAR(difference_update_doc,
1477 "Remove all elements of another set from this set.");
1478
1479 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1480 set_copy_and_difference(PySetObject *so, PyObject *other)
1481 {
1482 PyObject *result;
1483
1484 result = set_copy(so, NULL);
1485 if (result == NULL)
1486 return NULL;
1487 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1488 return result;
1489 Py_DECREF(result);
1490 return NULL;
1491 }
1492
1493 static PyObject *
set_difference(PySetObject * so,PyObject * other)1494 set_difference(PySetObject *so, PyObject *other)
1495 {
1496 PyObject *result;
1497 PyObject *key;
1498 Py_hash_t hash;
1499 setentry *entry;
1500 Py_ssize_t pos = 0, other_size;
1501 int rv;
1502
1503 if (PyAnySet_Check(other)) {
1504 other_size = PySet_GET_SIZE(other);
1505 }
1506 else if (PyDict_CheckExact(other)) {
1507 other_size = PyDict_GET_SIZE(other);
1508 }
1509 else {
1510 return set_copy_and_difference(so, other);
1511 }
1512
1513 /* If len(so) much more than len(other), it's more efficient to simply copy
1514 * so and then iterate other looking for common elements. */
1515 if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1516 return set_copy_and_difference(so, other);
1517 }
1518
1519 result = make_new_set_basetype(Py_TYPE(so), NULL);
1520 if (result == NULL)
1521 return NULL;
1522
1523 if (PyDict_CheckExact(other)) {
1524 while (set_next(so, &pos, &entry)) {
1525 key = entry->key;
1526 hash = entry->hash;
1527 Py_INCREF(key);
1528 rv = _PyDict_Contains_KnownHash(other, key, hash);
1529 if (rv < 0) {
1530 Py_DECREF(result);
1531 Py_DECREF(key);
1532 return NULL;
1533 }
1534 if (!rv) {
1535 if (set_add_entry((PySetObject *)result, key, hash)) {
1536 Py_DECREF(result);
1537 Py_DECREF(key);
1538 return NULL;
1539 }
1540 }
1541 Py_DECREF(key);
1542 }
1543 return result;
1544 }
1545
1546 /* Iterate over so, checking for common elements in other. */
1547 while (set_next(so, &pos, &entry)) {
1548 key = entry->key;
1549 hash = entry->hash;
1550 Py_INCREF(key);
1551 rv = set_contains_entry((PySetObject *)other, key, hash);
1552 if (rv < 0) {
1553 Py_DECREF(result);
1554 Py_DECREF(key);
1555 return NULL;
1556 }
1557 if (!rv) {
1558 if (set_add_entry((PySetObject *)result, key, hash)) {
1559 Py_DECREF(result);
1560 Py_DECREF(key);
1561 return NULL;
1562 }
1563 }
1564 Py_DECREF(key);
1565 }
1566 return result;
1567 }
1568
1569 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1570 set_difference_multi(PySetObject *so, PyObject *args)
1571 {
1572 Py_ssize_t i;
1573 PyObject *result, *other;
1574
1575 if (PyTuple_GET_SIZE(args) == 0)
1576 return set_copy(so, NULL);
1577
1578 other = PyTuple_GET_ITEM(args, 0);
1579 result = set_difference(so, other);
1580 if (result == NULL)
1581 return NULL;
1582
1583 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1584 other = PyTuple_GET_ITEM(args, i);
1585 if (set_difference_update_internal((PySetObject *)result, other)) {
1586 Py_DECREF(result);
1587 return NULL;
1588 }
1589 }
1590 return result;
1591 }
1592
1593 PyDoc_STRVAR(difference_doc,
1594 "Return the difference of two or more sets as a new set.\n\
1595 \n\
1596 (i.e. all elements that are in this set but not the others.)");
1597 static PyObject *
set_sub(PySetObject * so,PyObject * other)1598 set_sub(PySetObject *so, PyObject *other)
1599 {
1600 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1601 Py_RETURN_NOTIMPLEMENTED;
1602 return set_difference(so, other);
1603 }
1604
1605 static PyObject *
set_isub(PySetObject * so,PyObject * other)1606 set_isub(PySetObject *so, PyObject *other)
1607 {
1608 if (!PyAnySet_Check(other))
1609 Py_RETURN_NOTIMPLEMENTED;
1610 if (set_difference_update_internal(so, other))
1611 return NULL;
1612 Py_INCREF(so);
1613 return (PyObject *)so;
1614 }
1615
1616 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1617 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1618 {
1619 PySetObject *otherset;
1620 PyObject *key;
1621 Py_ssize_t pos = 0;
1622 Py_hash_t hash;
1623 setentry *entry;
1624 int rv;
1625
1626 if ((PyObject *)so == other)
1627 return set_clear(so, NULL);
1628
1629 if (PyDict_CheckExact(other)) {
1630 PyObject *value;
1631 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1632 Py_INCREF(key);
1633 rv = set_discard_entry(so, key, hash);
1634 if (rv < 0) {
1635 Py_DECREF(key);
1636 return NULL;
1637 }
1638 if (rv == DISCARD_NOTFOUND) {
1639 if (set_add_entry(so, key, hash)) {
1640 Py_DECREF(key);
1641 return NULL;
1642 }
1643 }
1644 Py_DECREF(key);
1645 }
1646 Py_RETURN_NONE;
1647 }
1648
1649 if (PyAnySet_Check(other)) {
1650 Py_INCREF(other);
1651 otherset = (PySetObject *)other;
1652 } else {
1653 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1654 if (otherset == NULL)
1655 return NULL;
1656 }
1657
1658 while (set_next(otherset, &pos, &entry)) {
1659 key = entry->key;
1660 hash = entry->hash;
1661 Py_INCREF(key);
1662 rv = set_discard_entry(so, key, hash);
1663 if (rv < 0) {
1664 Py_DECREF(otherset);
1665 Py_DECREF(key);
1666 return NULL;
1667 }
1668 if (rv == DISCARD_NOTFOUND) {
1669 if (set_add_entry(so, key, hash)) {
1670 Py_DECREF(otherset);
1671 Py_DECREF(key);
1672 return NULL;
1673 }
1674 }
1675 Py_DECREF(key);
1676 }
1677 Py_DECREF(otherset);
1678 Py_RETURN_NONE;
1679 }
1680
1681 PyDoc_STRVAR(symmetric_difference_update_doc,
1682 "Update a set with the symmetric difference of itself and another.");
1683
1684 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1685 set_symmetric_difference(PySetObject *so, PyObject *other)
1686 {
1687 PyObject *rv;
1688 PySetObject *otherset;
1689
1690 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1691 if (otherset == NULL)
1692 return NULL;
1693 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1694 if (rv == NULL) {
1695 Py_DECREF(otherset);
1696 return NULL;
1697 }
1698 Py_DECREF(rv);
1699 return (PyObject *)otherset;
1700 }
1701
1702 PyDoc_STRVAR(symmetric_difference_doc,
1703 "Return the symmetric difference of two sets as a new set.\n\
1704 \n\
1705 (i.e. all elements that are in exactly one of the sets.)");
1706
1707 static PyObject *
set_xor(PySetObject * so,PyObject * other)1708 set_xor(PySetObject *so, PyObject *other)
1709 {
1710 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1711 Py_RETURN_NOTIMPLEMENTED;
1712 return set_symmetric_difference(so, other);
1713 }
1714
1715 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1716 set_ixor(PySetObject *so, PyObject *other)
1717 {
1718 PyObject *result;
1719
1720 if (!PyAnySet_Check(other))
1721 Py_RETURN_NOTIMPLEMENTED;
1722 result = set_symmetric_difference_update(so, other);
1723 if (result == NULL)
1724 return NULL;
1725 Py_DECREF(result);
1726 Py_INCREF(so);
1727 return (PyObject *)so;
1728 }
1729
1730 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1731 set_issubset(PySetObject *so, PyObject *other)
1732 {
1733 setentry *entry;
1734 Py_ssize_t pos = 0;
1735 int rv;
1736
1737 if (!PyAnySet_Check(other)) {
1738 PyObject *tmp, *result;
1739 tmp = make_new_set(&PySet_Type, other);
1740 if (tmp == NULL)
1741 return NULL;
1742 result = set_issubset(so, tmp);
1743 Py_DECREF(tmp);
1744 return result;
1745 }
1746 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1747 Py_RETURN_FALSE;
1748
1749 while (set_next(so, &pos, &entry)) {
1750 PyObject *key = entry->key;
1751 Py_INCREF(key);
1752 rv = set_contains_entry((PySetObject *)other, key, entry->hash);
1753 Py_DECREF(key);
1754 if (rv < 0) {
1755 return NULL;
1756 }
1757 if (!rv) {
1758 Py_RETURN_FALSE;
1759 }
1760 }
1761 Py_RETURN_TRUE;
1762 }
1763
1764 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1765
1766 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1767 set_issuperset(PySetObject *so, PyObject *other)
1768 {
1769 if (PyAnySet_Check(other)) {
1770 return set_issubset((PySetObject *)other, (PyObject *)so);
1771 }
1772
1773 PyObject *key, *it = PyObject_GetIter(other);
1774 if (it == NULL) {
1775 return NULL;
1776 }
1777 while ((key = PyIter_Next(it)) != NULL) {
1778 int rv = set_contains_key(so, key);
1779 Py_DECREF(key);
1780 if (rv < 0) {
1781 Py_DECREF(it);
1782 return NULL;
1783 }
1784 if (!rv) {
1785 Py_DECREF(it);
1786 Py_RETURN_FALSE;
1787 }
1788 }
1789 Py_DECREF(it);
1790 if (PyErr_Occurred()) {
1791 return NULL;
1792 }
1793 Py_RETURN_TRUE;
1794 }
1795
1796 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1797
1798 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1799 set_richcompare(PySetObject *v, PyObject *w, int op)
1800 {
1801 PyObject *r1;
1802 int r2;
1803
1804 if(!PyAnySet_Check(w))
1805 Py_RETURN_NOTIMPLEMENTED;
1806
1807 switch (op) {
1808 case Py_EQ:
1809 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1810 Py_RETURN_FALSE;
1811 if (v->hash != -1 &&
1812 ((PySetObject *)w)->hash != -1 &&
1813 v->hash != ((PySetObject *)w)->hash)
1814 Py_RETURN_FALSE;
1815 return set_issubset(v, w);
1816 case Py_NE:
1817 r1 = set_richcompare(v, w, Py_EQ);
1818 if (r1 == NULL)
1819 return NULL;
1820 r2 = PyObject_IsTrue(r1);
1821 Py_DECREF(r1);
1822 if (r2 < 0)
1823 return NULL;
1824 return PyBool_FromLong(!r2);
1825 case Py_LE:
1826 return set_issubset(v, w);
1827 case Py_GE:
1828 return set_issuperset(v, w);
1829 case Py_LT:
1830 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1831 Py_RETURN_FALSE;
1832 return set_issubset(v, w);
1833 case Py_GT:
1834 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1835 Py_RETURN_FALSE;
1836 return set_issuperset(v, w);
1837 }
1838 Py_RETURN_NOTIMPLEMENTED;
1839 }
1840
1841 static PyObject *
set_add(PySetObject * so,PyObject * key)1842 set_add(PySetObject *so, PyObject *key)
1843 {
1844 if (set_add_key(so, key))
1845 return NULL;
1846 Py_RETURN_NONE;
1847 }
1848
1849 PyDoc_STRVAR(add_doc,
1850 "Add an element to a set.\n\
1851 \n\
1852 This has no effect if the element is already present.");
1853
1854 static int
set_contains(PySetObject * so,PyObject * key)1855 set_contains(PySetObject *so, PyObject *key)
1856 {
1857 PyObject *tmpkey;
1858 int rv;
1859
1860 rv = set_contains_key(so, key);
1861 if (rv < 0) {
1862 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1863 return -1;
1864 PyErr_Clear();
1865 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1866 if (tmpkey == NULL)
1867 return -1;
1868 rv = set_contains_key(so, tmpkey);
1869 Py_DECREF(tmpkey);
1870 }
1871 return rv;
1872 }
1873
1874 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1875 set_direct_contains(PySetObject *so, PyObject *key)
1876 {
1877 long result;
1878
1879 result = set_contains(so, key);
1880 if (result < 0)
1881 return NULL;
1882 return PyBool_FromLong(result);
1883 }
1884
1885 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1886
1887 static PyObject *
set_remove(PySetObject * so,PyObject * key)1888 set_remove(PySetObject *so, PyObject *key)
1889 {
1890 PyObject *tmpkey;
1891 int rv;
1892
1893 rv = set_discard_key(so, key);
1894 if (rv < 0) {
1895 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1896 return NULL;
1897 PyErr_Clear();
1898 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1899 if (tmpkey == NULL)
1900 return NULL;
1901 rv = set_discard_key(so, tmpkey);
1902 Py_DECREF(tmpkey);
1903 if (rv < 0)
1904 return NULL;
1905 }
1906
1907 if (rv == DISCARD_NOTFOUND) {
1908 _PyErr_SetKeyError(key);
1909 return NULL;
1910 }
1911 Py_RETURN_NONE;
1912 }
1913
1914 PyDoc_STRVAR(remove_doc,
1915 "Remove an element from a set; it must be a member.\n\
1916 \n\
1917 If the element is not a member, raise a KeyError.");
1918
1919 static PyObject *
set_discard(PySetObject * so,PyObject * key)1920 set_discard(PySetObject *so, PyObject *key)
1921 {
1922 PyObject *tmpkey;
1923 int rv;
1924
1925 rv = set_discard_key(so, key);
1926 if (rv < 0) {
1927 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1928 return NULL;
1929 PyErr_Clear();
1930 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1931 if (tmpkey == NULL)
1932 return NULL;
1933 rv = set_discard_key(so, tmpkey);
1934 Py_DECREF(tmpkey);
1935 if (rv < 0)
1936 return NULL;
1937 }
1938 Py_RETURN_NONE;
1939 }
1940
1941 PyDoc_STRVAR(discard_doc,
1942 "Remove an element from a set if it is a member.\n\
1943 \n\
1944 Unlike set.remove(), the discard() method does not raise\n\
1945 an exception when an element is missing from the set.");
1946
1947 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1948 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1949 {
1950 PyObject *keys=NULL, *args=NULL, *result=NULL, *state=NULL;
1951
1952 keys = PySequence_List((PyObject *)so);
1953 if (keys == NULL)
1954 goto done;
1955 args = PyTuple_Pack(1, keys);
1956 if (args == NULL)
1957 goto done;
1958 state = _PyObject_GetState((PyObject *)so);
1959 if (state == NULL)
1960 goto done;
1961 result = PyTuple_Pack(3, Py_TYPE(so), args, state);
1962 done:
1963 Py_XDECREF(args);
1964 Py_XDECREF(keys);
1965 Py_XDECREF(state);
1966 return result;
1967 }
1968
1969 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1970 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1971 {
1972 Py_ssize_t res;
1973
1974 res = _PyObject_SIZE(Py_TYPE(so));
1975 if (so->table != so->smalltable)
1976 res = res + (so->mask + 1) * sizeof(setentry);
1977 return PyLong_FromSsize_t(res);
1978 }
1979
1980 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1981 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1982 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1983 {
1984 PyObject *iterable = NULL;
1985
1986 if (!_PyArg_NoKeywords("set", kwds))
1987 return -1;
1988 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1989 return -1;
1990 if (self->fill)
1991 set_clear_internal(self);
1992 self->hash = -1;
1993 if (iterable == NULL)
1994 return 0;
1995 return set_update_internal(self, iterable);
1996 }
1997
1998 static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1999 set_vectorcall(PyObject *type, PyObject * const*args,
2000 size_t nargsf, PyObject *kwnames)
2001 {
2002 assert(PyType_Check(type));
2003
2004 if (!_PyArg_NoKwnames("set", kwnames)) {
2005 return NULL;
2006 }
2007
2008 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
2009 if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
2010 return NULL;
2011 }
2012
2013 if (nargs) {
2014 return make_new_set(_PyType_CAST(type), args[0]);
2015 }
2016
2017 return make_new_set(_PyType_CAST(type), NULL);
2018 }
2019
2020 static PySequenceMethods set_as_sequence = {
2021 set_len, /* sq_length */
2022 0, /* sq_concat */
2023 0, /* sq_repeat */
2024 0, /* sq_item */
2025 0, /* sq_slice */
2026 0, /* sq_ass_item */
2027 0, /* sq_ass_slice */
2028 (objobjproc)set_contains, /* sq_contains */
2029 };
2030
2031 /* set object ********************************************************/
2032
2033 #ifdef Py_DEBUG
2034 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
2035
2036 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2037 All is well if assertions don't fail.");
2038 #endif
2039
2040 static PyMethodDef set_methods[] = {
2041 {"add", (PyCFunction)set_add, METH_O,
2042 add_doc},
2043 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2044 clear_doc},
2045 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2046 contains_doc},
2047 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2048 copy_doc},
2049 {"discard", (PyCFunction)set_discard, METH_O,
2050 discard_doc},
2051 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2052 difference_doc},
2053 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2054 difference_update_doc},
2055 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2056 intersection_doc},
2057 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2058 intersection_update_doc},
2059 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2060 isdisjoint_doc},
2061 {"issubset", (PyCFunction)set_issubset, METH_O,
2062 issubset_doc},
2063 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2064 issuperset_doc},
2065 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2066 pop_doc},
2067 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2068 reduce_doc},
2069 {"remove", (PyCFunction)set_remove, METH_O,
2070 remove_doc},
2071 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2072 sizeof_doc},
2073 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2074 symmetric_difference_doc},
2075 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2076 symmetric_difference_update_doc},
2077 #ifdef Py_DEBUG
2078 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2079 test_c_api_doc},
2080 #endif
2081 {"union", (PyCFunction)set_union, METH_VARARGS,
2082 union_doc},
2083 {"update", (PyCFunction)set_update, METH_VARARGS,
2084 update_doc},
2085 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2086 {NULL, NULL} /* sentinel */
2087 };
2088
2089 static PyNumberMethods set_as_number = {
2090 0, /*nb_add*/
2091 (binaryfunc)set_sub, /*nb_subtract*/
2092 0, /*nb_multiply*/
2093 0, /*nb_remainder*/
2094 0, /*nb_divmod*/
2095 0, /*nb_power*/
2096 0, /*nb_negative*/
2097 0, /*nb_positive*/
2098 0, /*nb_absolute*/
2099 0, /*nb_bool*/
2100 0, /*nb_invert*/
2101 0, /*nb_lshift*/
2102 0, /*nb_rshift*/
2103 (binaryfunc)set_and, /*nb_and*/
2104 (binaryfunc)set_xor, /*nb_xor*/
2105 (binaryfunc)set_or, /*nb_or*/
2106 0, /*nb_int*/
2107 0, /*nb_reserved*/
2108 0, /*nb_float*/
2109 0, /*nb_inplace_add*/
2110 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2111 0, /*nb_inplace_multiply*/
2112 0, /*nb_inplace_remainder*/
2113 0, /*nb_inplace_power*/
2114 0, /*nb_inplace_lshift*/
2115 0, /*nb_inplace_rshift*/
2116 (binaryfunc)set_iand, /*nb_inplace_and*/
2117 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2118 (binaryfunc)set_ior, /*nb_inplace_or*/
2119 };
2120
2121 PyDoc_STRVAR(set_doc,
2122 "set() -> new empty set object\n\
2123 set(iterable) -> new set object\n\
2124 \n\
2125 Build an unordered collection of unique elements.");
2126
2127 PyTypeObject PySet_Type = {
2128 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2129 "set", /* tp_name */
2130 sizeof(PySetObject), /* tp_basicsize */
2131 0, /* tp_itemsize */
2132 /* methods */
2133 (destructor)set_dealloc, /* tp_dealloc */
2134 0, /* tp_vectorcall_offset */
2135 0, /* tp_getattr */
2136 0, /* tp_setattr */
2137 0, /* tp_as_async */
2138 (reprfunc)set_repr, /* tp_repr */
2139 &set_as_number, /* tp_as_number */
2140 &set_as_sequence, /* tp_as_sequence */
2141 0, /* tp_as_mapping */
2142 PyObject_HashNotImplemented, /* tp_hash */
2143 0, /* tp_call */
2144 0, /* tp_str */
2145 PyObject_GenericGetAttr, /* tp_getattro */
2146 0, /* tp_setattro */
2147 0, /* tp_as_buffer */
2148 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2149 Py_TPFLAGS_BASETYPE |
2150 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2151 set_doc, /* tp_doc */
2152 (traverseproc)set_traverse, /* tp_traverse */
2153 (inquiry)set_clear_internal, /* tp_clear */
2154 (richcmpfunc)set_richcompare, /* tp_richcompare */
2155 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2156 (getiterfunc)set_iter, /* tp_iter */
2157 0, /* tp_iternext */
2158 set_methods, /* tp_methods */
2159 0, /* tp_members */
2160 0, /* tp_getset */
2161 0, /* tp_base */
2162 0, /* tp_dict */
2163 0, /* tp_descr_get */
2164 0, /* tp_descr_set */
2165 0, /* tp_dictoffset */
2166 (initproc)set_init, /* tp_init */
2167 PyType_GenericAlloc, /* tp_alloc */
2168 set_new, /* tp_new */
2169 PyObject_GC_Del, /* tp_free */
2170 .tp_vectorcall = set_vectorcall,
2171 };
2172
2173 /* frozenset object ********************************************************/
2174
2175
2176 static PyMethodDef frozenset_methods[] = {
2177 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2178 contains_doc},
2179 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2180 copy_doc},
2181 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2182 difference_doc},
2183 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2184 intersection_doc},
2185 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2186 isdisjoint_doc},
2187 {"issubset", (PyCFunction)set_issubset, METH_O,
2188 issubset_doc},
2189 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2190 issuperset_doc},
2191 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2192 reduce_doc},
2193 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2194 sizeof_doc},
2195 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2196 symmetric_difference_doc},
2197 {"union", (PyCFunction)set_union, METH_VARARGS,
2198 union_doc},
2199 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2200 {NULL, NULL} /* sentinel */
2201 };
2202
2203 static PyNumberMethods frozenset_as_number = {
2204 0, /*nb_add*/
2205 (binaryfunc)set_sub, /*nb_subtract*/
2206 0, /*nb_multiply*/
2207 0, /*nb_remainder*/
2208 0, /*nb_divmod*/
2209 0, /*nb_power*/
2210 0, /*nb_negative*/
2211 0, /*nb_positive*/
2212 0, /*nb_absolute*/
2213 0, /*nb_bool*/
2214 0, /*nb_invert*/
2215 0, /*nb_lshift*/
2216 0, /*nb_rshift*/
2217 (binaryfunc)set_and, /*nb_and*/
2218 (binaryfunc)set_xor, /*nb_xor*/
2219 (binaryfunc)set_or, /*nb_or*/
2220 };
2221
2222 PyDoc_STRVAR(frozenset_doc,
2223 "frozenset() -> empty frozenset object\n\
2224 frozenset(iterable) -> frozenset object\n\
2225 \n\
2226 Build an immutable unordered collection of unique elements.");
2227
2228 PyTypeObject PyFrozenSet_Type = {
2229 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2230 "frozenset", /* tp_name */
2231 sizeof(PySetObject), /* tp_basicsize */
2232 0, /* tp_itemsize */
2233 /* methods */
2234 (destructor)set_dealloc, /* tp_dealloc */
2235 0, /* tp_vectorcall_offset */
2236 0, /* tp_getattr */
2237 0, /* tp_setattr */
2238 0, /* tp_as_async */
2239 (reprfunc)set_repr, /* tp_repr */
2240 &frozenset_as_number, /* tp_as_number */
2241 &set_as_sequence, /* tp_as_sequence */
2242 0, /* tp_as_mapping */
2243 frozenset_hash, /* tp_hash */
2244 0, /* tp_call */
2245 0, /* tp_str */
2246 PyObject_GenericGetAttr, /* tp_getattro */
2247 0, /* tp_setattro */
2248 0, /* tp_as_buffer */
2249 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2250 Py_TPFLAGS_BASETYPE |
2251 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2252 frozenset_doc, /* tp_doc */
2253 (traverseproc)set_traverse, /* tp_traverse */
2254 (inquiry)set_clear_internal, /* tp_clear */
2255 (richcmpfunc)set_richcompare, /* tp_richcompare */
2256 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2257 (getiterfunc)set_iter, /* tp_iter */
2258 0, /* tp_iternext */
2259 frozenset_methods, /* tp_methods */
2260 0, /* tp_members */
2261 0, /* tp_getset */
2262 0, /* tp_base */
2263 0, /* tp_dict */
2264 0, /* tp_descr_get */
2265 0, /* tp_descr_set */
2266 0, /* tp_dictoffset */
2267 0, /* tp_init */
2268 PyType_GenericAlloc, /* tp_alloc */
2269 frozenset_new, /* tp_new */
2270 PyObject_GC_Del, /* tp_free */
2271 .tp_vectorcall = frozenset_vectorcall,
2272 };
2273
2274
2275 /***** C API functions *************************************************/
2276
2277 PyObject *
PySet_New(PyObject * iterable)2278 PySet_New(PyObject *iterable)
2279 {
2280 return make_new_set(&PySet_Type, iterable);
2281 }
2282
2283 PyObject *
PyFrozenSet_New(PyObject * iterable)2284 PyFrozenSet_New(PyObject *iterable)
2285 {
2286 return make_new_set(&PyFrozenSet_Type, iterable);
2287 }
2288
2289 Py_ssize_t
PySet_Size(PyObject * anyset)2290 PySet_Size(PyObject *anyset)
2291 {
2292 if (!PyAnySet_Check(anyset)) {
2293 PyErr_BadInternalCall();
2294 return -1;
2295 }
2296 return PySet_GET_SIZE(anyset);
2297 }
2298
2299 int
PySet_Clear(PyObject * set)2300 PySet_Clear(PyObject *set)
2301 {
2302 if (!PySet_Check(set)) {
2303 PyErr_BadInternalCall();
2304 return -1;
2305 }
2306 return set_clear_internal((PySetObject *)set);
2307 }
2308
2309 int
PySet_Contains(PyObject * anyset,PyObject * key)2310 PySet_Contains(PyObject *anyset, PyObject *key)
2311 {
2312 if (!PyAnySet_Check(anyset)) {
2313 PyErr_BadInternalCall();
2314 return -1;
2315 }
2316 return set_contains_key((PySetObject *)anyset, key);
2317 }
2318
2319 int
PySet_Discard(PyObject * set,PyObject * key)2320 PySet_Discard(PyObject *set, PyObject *key)
2321 {
2322 if (!PySet_Check(set)) {
2323 PyErr_BadInternalCall();
2324 return -1;
2325 }
2326 return set_discard_key((PySetObject *)set, key);
2327 }
2328
2329 int
PySet_Add(PyObject * anyset,PyObject * key)2330 PySet_Add(PyObject *anyset, PyObject *key)
2331 {
2332 if (!PySet_Check(anyset) &&
2333 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2334 PyErr_BadInternalCall();
2335 return -1;
2336 }
2337 return set_add_key((PySetObject *)anyset, key);
2338 }
2339
2340 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2341 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2342 {
2343 setentry *entry;
2344
2345 if (!PyAnySet_Check(set)) {
2346 PyErr_BadInternalCall();
2347 return -1;
2348 }
2349 if (set_next((PySetObject *)set, pos, &entry) == 0)
2350 return 0;
2351 *key = entry->key;
2352 *hash = entry->hash;
2353 return 1;
2354 }
2355
2356 PyObject *
PySet_Pop(PyObject * set)2357 PySet_Pop(PyObject *set)
2358 {
2359 if (!PySet_Check(set)) {
2360 PyErr_BadInternalCall();
2361 return NULL;
2362 }
2363 return set_pop((PySetObject *)set, NULL);
2364 }
2365
2366 int
_PySet_Update(PyObject * set,PyObject * iterable)2367 _PySet_Update(PyObject *set, PyObject *iterable)
2368 {
2369 if (!PySet_Check(set)) {
2370 PyErr_BadInternalCall();
2371 return -1;
2372 }
2373 return set_update_internal((PySetObject *)set, iterable);
2374 }
2375
2376 /* Exported for the gdb plugin's benefit. */
2377 PyObject *_PySet_Dummy = dummy;
2378
2379 #ifdef Py_DEBUG
2380
2381 /* Test code to be called with any three element set.
2382 Returns True and original set is restored. */
2383
2384 #define assertRaises(call_return_value, exception) \
2385 do { \
2386 assert(call_return_value); \
2387 assert(PyErr_ExceptionMatches(exception)); \
2388 PyErr_Clear(); \
2389 } while(0)
2390
2391 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2392 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2393 {
2394 Py_ssize_t count;
2395 const char *s;
2396 Py_ssize_t i;
2397 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2398 PyObject *ob = (PyObject *)so;
2399 Py_hash_t hash;
2400 PyObject *str;
2401
2402 /* Verify preconditions */
2403 assert(PyAnySet_Check(ob));
2404 assert(PyAnySet_CheckExact(ob));
2405 assert(!PyFrozenSet_CheckExact(ob));
2406
2407 /* so.clear(); so |= set("abc"); */
2408 str = PyUnicode_FromString("abc");
2409 if (str == NULL)
2410 return NULL;
2411 set_clear_internal(so);
2412 if (set_update_internal(so, str)) {
2413 Py_DECREF(str);
2414 return NULL;
2415 }
2416 Py_DECREF(str);
2417
2418 /* Exercise type/size checks */
2419 assert(PySet_Size(ob) == 3);
2420 assert(PySet_GET_SIZE(ob) == 3);
2421
2422 /* Raise TypeError for non-iterable constructor arguments */
2423 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2424 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2425
2426 /* Raise TypeError for unhashable key */
2427 dup = PySet_New(ob);
2428 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2429 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2430 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2431
2432 /* Exercise successful pop, contains, add, and discard */
2433 elem = PySet_Pop(ob);
2434 assert(PySet_Contains(ob, elem) == 0);
2435 assert(PySet_GET_SIZE(ob) == 2);
2436 assert(PySet_Add(ob, elem) == 0);
2437 assert(PySet_Contains(ob, elem) == 1);
2438 assert(PySet_GET_SIZE(ob) == 3);
2439 assert(PySet_Discard(ob, elem) == 1);
2440 assert(PySet_GET_SIZE(ob) == 2);
2441 assert(PySet_Discard(ob, elem) == 0);
2442 assert(PySet_GET_SIZE(ob) == 2);
2443
2444 /* Exercise clear */
2445 dup2 = PySet_New(dup);
2446 assert(PySet_Clear(dup2) == 0);
2447 assert(PySet_Size(dup2) == 0);
2448 Py_DECREF(dup2);
2449
2450 /* Raise SystemError on clear or update of frozen set */
2451 f = PyFrozenSet_New(dup);
2452 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2453 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2454 assert(PySet_Add(f, elem) == 0);
2455 Py_INCREF(f);
2456 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2457 Py_DECREF(f);
2458 Py_DECREF(f);
2459
2460 /* Exercise direct iteration */
2461 i = 0, count = 0;
2462 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2463 s = PyUnicode_AsUTF8(x);
2464 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2465 count++;
2466 }
2467 assert(count == 3);
2468
2469 /* Exercise updates */
2470 dup2 = PySet_New(NULL);
2471 assert(_PySet_Update(dup2, dup) == 0);
2472 assert(PySet_Size(dup2) == 3);
2473 assert(_PySet_Update(dup2, dup) == 0);
2474 assert(PySet_Size(dup2) == 3);
2475 Py_DECREF(dup2);
2476
2477 /* Raise SystemError when self argument is not a set or frozenset. */
2478 t = PyTuple_New(0);
2479 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2480 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2481 Py_DECREF(t);
2482
2483 /* Raise SystemError when self argument is not a set. */
2484 f = PyFrozenSet_New(dup);
2485 assert(PySet_Size(f) == 3);
2486 assert(PyFrozenSet_CheckExact(f));
2487 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2488 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2489 Py_DECREF(f);
2490
2491 /* Raise KeyError when popping from an empty set */
2492 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2493 Py_DECREF(ob);
2494 assert(PySet_GET_SIZE(ob) == 0);
2495 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2496
2497 /* Restore the set from the copy using the PyNumber API */
2498 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2499 Py_DECREF(ob);
2500
2501 /* Verify constructors accept NULL arguments */
2502 f = PySet_New(NULL);
2503 assert(f != NULL);
2504 assert(PySet_GET_SIZE(f) == 0);
2505 Py_DECREF(f);
2506 f = PyFrozenSet_New(NULL);
2507 assert(f != NULL);
2508 assert(PyFrozenSet_CheckExact(f));
2509 assert(PySet_GET_SIZE(f) == 0);
2510 Py_DECREF(f);
2511
2512 Py_DECREF(elem);
2513 Py_DECREF(dup);
2514 Py_RETURN_TRUE;
2515 }
2516
2517 #undef assertRaises
2518
2519 #endif
2520
2521 /***** Dummy Struct *************************************************/
2522
2523 static PyObject *
dummy_repr(PyObject * op)2524 dummy_repr(PyObject *op)
2525 {
2526 return PyUnicode_FromString("<dummy key>");
2527 }
2528
2529 static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2530 dummy_dealloc(PyObject* ignore)
2531 {
2532 Py_FatalError("deallocating <dummy key>");
2533 }
2534
2535 static PyTypeObject _PySetDummy_Type = {
2536 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2537 "<dummy key> type",
2538 0,
2539 0,
2540 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2541 0, /*tp_vectorcall_offset*/
2542 0, /*tp_getattr*/
2543 0, /*tp_setattr*/
2544 0, /*tp_as_async*/
2545 dummy_repr, /*tp_repr*/
2546 0, /*tp_as_number*/
2547 0, /*tp_as_sequence*/
2548 0, /*tp_as_mapping*/
2549 0, /*tp_hash */
2550 0, /*tp_call */
2551 0, /*tp_str */
2552 0, /*tp_getattro */
2553 0, /*tp_setattro */
2554 0, /*tp_as_buffer */
2555 Py_TPFLAGS_DEFAULT, /*tp_flags */
2556 };
2557
2558 static PyObject _dummy_struct = {
2559 _PyObject_EXTRA_INIT
2560 2, &_PySetDummy_Type
2561 };
2562