1 /* Dictionary object implementation using a hash table */
2
3 /* The distribution includes a separate file, Objects/dictnotes.txt,
4 describing explorations into dictionary design and optimization.
5 It covers typical dictionary use patterns, the parameters for
6 tuning dictionaries, and several ideas for possible optimizations.
7 */
8
9 /* PyDictKeysObject
10
11 This implements the dictionary's hashtable.
12
13 As of Python 3.6, this is compact and ordered. Basic idea is described here:
14 * https://mail.python.org/pipermail/python-dev/2012-December/123028.html
15 * https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
16
17 layout:
18
19 +---------------------+
20 | dk_refcnt |
21 | dk_log2_size |
22 | dk_log2_index_bytes |
23 | dk_kind |
24 | dk_usable |
25 | dk_nentries |
26 +---------------------+
27 | dk_indices[] |
28 | |
29 +---------------------+
30 | dk_entries[] |
31 | |
32 +---------------------+
33
34 dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
35 or DKIX_DUMMY(-2).
36 Size of indices is dk_size. Type of each index in indices is vary on dk_size:
37
38 * int8 for dk_size <= 128
39 * int16 for 256 <= dk_size <= 2**15
40 * int32 for 2**16 <= dk_size <= 2**31
41 * int64 for 2**32 <= dk_size
42
43 dk_entries is array of PyDictKeyEntry when dk_kind == DICT_KEYS_GENERAL or
44 PyDictUnicodeEntry otherwise. Its length is USABLE_FRACTION(dk_size).
45
46 NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
47 dk_indices entry is signed integer and int16 is used for table which
48 dk_size == 256.
49 */
50
51
52 /*
53 The DictObject can be in one of two forms.
54
55 Either:
56 A combined table:
57 ma_values == NULL, dk_refcnt == 1.
58 Values are stored in the me_value field of the PyDictKeysObject.
59 Or:
60 A split table:
61 ma_values != NULL, dk_refcnt >= 1
62 Values are stored in the ma_values array.
63 Only string (unicode) keys are allowed.
64
65 There are four kinds of slots in the table (slot is index, and
66 DK_ENTRIES(keys)[index] if index >= 0):
67
68 1. Unused. index == DKIX_EMPTY
69 Does not hold an active (key, value) pair now and never did. Unused can
70 transition to Active upon key insertion. This is each slot's initial state.
71
72 2. Active. index >= 0, me_key != NULL and me_value != NULL
73 Holds an active (key, value) pair. Active can transition to Dummy or
74 Pending upon key deletion (for combined and split tables respectively).
75 This is the only case in which me_value != NULL.
76
77 3. Dummy. index == DKIX_DUMMY (combined only)
78 Previously held an active (key, value) pair, but that was deleted and an
79 active pair has not yet overwritten the slot. Dummy can transition to
80 Active upon key insertion. Dummy slots cannot be made Unused again
81 else the probe sequence in case of collision would have no way to know
82 they were once active.
83
84 4. Pending. index >= 0, key != NULL, and value == NULL (split only)
85 Not yet inserted in split-table.
86 */
87
88 /*
89 Preserving insertion order
90
91 It's simple for combined table. Since dk_entries is mostly append only, we can
92 get insertion order by just iterating dk_entries.
93
94 One exception is .popitem(). It removes last item in dk_entries and decrement
95 dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
96 dk_indices, we can't increment dk_usable even though dk_nentries is
97 decremented.
98
99 To preserve the order in a split table, a bit vector is used to record the
100 insertion order. When a key is inserted the bit vector is shifted up by 4 bits
101 and the index of the key is stored in the low 4 bits.
102 As a consequence of this, split keys have a maximum size of 16.
103 */
104
105 /* PyDict_MINSIZE is the starting size for any new dict.
106 * 8 allows dicts with no more than 5 active entries; experiments suggested
107 * this suffices for the majority of dicts (consisting mostly of usually-small
108 * dicts created to pass keyword arguments).
109 * Making this 8, rather than 4 reduces the number of resizes for most
110 * dictionaries, without any significant extra memory use.
111 */
112 #define PyDict_LOG_MINSIZE 3
113 #define PyDict_MINSIZE 8
114
115 #include "Python.h"
116 #include "pycore_bitutils.h" // _Py_bit_length
117 #include "pycore_call.h" // _PyObject_CallNoArgs()
118 #include "pycore_code.h" // stats
119 #include "pycore_dict.h" // PyDictKeysObject
120 #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
121 #include "pycore_object.h" // _PyObject_GC_TRACK()
122 #include "pycore_pyerrors.h" // _PyErr_Fetch()
123 #include "pycore_pystate.h" // _PyThreadState_GET()
124 #include "stringlib/eq.h" // unicode_eq()
125
126 #include <stdbool.h>
127
128 /*[clinic input]
129 class dict "PyDictObject *" "&PyDict_Type"
130 [clinic start generated code]*/
131 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/
132
133
134 /*
135 To ensure the lookup algorithm terminates, there must be at least one Unused
136 slot (NULL key) in the table.
137 To avoid slowing down lookups on a near-full table, we resize the table when
138 it's USABLE_FRACTION (currently two-thirds) full.
139 */
140
141 #define PERTURB_SHIFT 5
142
143 /*
144 Major subtleties ahead: Most hash schemes depend on having a "good" hash
145 function, in the sense of simulating randomness. Python doesn't: its most
146 important hash functions (for ints) are very regular in common
147 cases:
148
149 >>>[hash(i) for i in range(4)]
150 [0, 1, 2, 3]
151
152 This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
153 the low-order i bits as the initial table index is extremely fast, and there
154 are no collisions at all for dicts indexed by a contiguous range of ints. So
155 this gives better-than-random behavior in common cases, and that's very
156 desirable.
157
158 OTOH, when collisions occur, the tendency to fill contiguous slices of the
159 hash table makes a good collision resolution strategy crucial. Taking only
160 the last i bits of the hash code is also vulnerable: for example, consider
161 the list [i << 16 for i in range(20000)] as a set of keys. Since ints are
162 their own hash codes, and this fits in a dict of size 2**15, the last 15 bits
163 of every hash code are all 0: they *all* map to the same table index.
164
165 But catering to unusual cases should not slow the usual ones, so we just take
166 the last i bits anyway. It's up to collision resolution to do the rest. If
167 we *usually* find the key we're looking for on the first try (and, it turns
168 out, we usually do -- the table load factor is kept under 2/3, so the odds
169 are solidly in our favor), then it makes best sense to keep the initial index
170 computation dirt cheap.
171
172 The first half of collision resolution is to visit table indices via this
173 recurrence:
174
175 j = ((5*j) + 1) mod 2**i
176
177 For any initial j in range(2**i), repeating that 2**i times generates each
178 int in range(2**i) exactly once (see any text on random-number generation for
179 proof). By itself, this doesn't help much: like linear probing (setting
180 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
181 order. This would be bad, except that's not the only thing we do, and it's
182 actually *good* in the common cases where hash keys are consecutive. In an
183 example that's really too small to make this entirely clear, for a table of
184 size 2**3 the order of indices is:
185
186 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
187
188 If two things come in at index 5, the first place we look after is index 2,
189 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
190 Linear probing is deadly in this case because there the fixed probe order
191 is the *same* as the order consecutive keys are likely to arrive. But it's
192 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
193 and certain that consecutive hash codes do not.
194
195 The other half of the strategy is to get the other bits of the hash code
196 into play. This is done by initializing a (unsigned) vrbl "perturb" to the
197 full hash code, and changing the recurrence to:
198
199 perturb >>= PERTURB_SHIFT;
200 j = (5*j) + 1 + perturb;
201 use j % 2**i as the next table index;
202
203 Now the probe sequence depends (eventually) on every bit in the hash code,
204 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
205 because it quickly magnifies small differences in the bits that didn't affect
206 the initial index. Note that because perturb is unsigned, if the recurrence
207 is executed often enough perturb eventually becomes and remains 0. At that
208 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
209 that's certain to find an empty slot eventually (since it generates every int
210 in range(2**i), and we make sure there's always at least one empty slot).
211
212 Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
213 small so that the high bits of the hash code continue to affect the probe
214 sequence across iterations; but you want it large so that in really bad cases
215 the high-order hash bits have an effect on early iterations. 5 was "the
216 best" in minimizing total collisions across experiments Tim Peters ran (on
217 both normal and pathological cases), but 4 and 6 weren't significantly worse.
218
219 Historical: Reimer Behrends contributed the idea of using a polynomial-based
220 approach, using repeated multiplication by x in GF(2**n) where an irreducible
221 polynomial for each table size was chosen such that x was a primitive root.
222 Christian Tismer later extended that to use division by x instead, as an
223 efficient way to get the high bits of the hash code into play. This scheme
224 also gave excellent collision statistics, but was more expensive: two
225 if-tests were required inside the loop; computing "the next" index took about
226 the same number of operations but without as much potential parallelism
227 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
228 above, and then shifting perturb can be done while the table index is being
229 masked); and the PyDictObject struct required a member to hold the table's
230 polynomial. In Tim's experiments the current scheme ran faster, produced
231 equally good collision statistics, needed less code & used less memory.
232
233 */
234
235 static int dictresize(PyDictObject *mp, uint8_t log_newsize, int unicode);
236
237 static PyObject* dict_iter(PyDictObject *dict);
238
239 /*Global counter used to set ma_version_tag field of dictionary.
240 * It is incremented each time that a dictionary is created and each
241 * time that a dictionary is modified. */
242 uint64_t _pydict_global_version = 0;
243
244 #include "clinic/dictobject.c.h"
245
246
247 #if PyDict_MAXFREELIST > 0
248 static struct _Py_dict_state *
get_dict_state(void)249 get_dict_state(void)
250 {
251 PyInterpreterState *interp = _PyInterpreterState_GET();
252 return &interp->dict_state;
253 }
254 #endif
255
256
257 void
_PyDict_ClearFreeList(PyInterpreterState * interp)258 _PyDict_ClearFreeList(PyInterpreterState *interp)
259 {
260 #if PyDict_MAXFREELIST > 0
261 struct _Py_dict_state *state = &interp->dict_state;
262 while (state->numfree) {
263 PyDictObject *op = state->free_list[--state->numfree];
264 assert(PyDict_CheckExact(op));
265 PyObject_GC_Del(op);
266 }
267 while (state->keys_numfree) {
268 PyObject_Free(state->keys_free_list[--state->keys_numfree]);
269 }
270 #endif
271 }
272
273
274 void
_PyDict_Fini(PyInterpreterState * interp)275 _PyDict_Fini(PyInterpreterState *interp)
276 {
277 _PyDict_ClearFreeList(interp);
278 #if defined(Py_DEBUG) && PyDict_MAXFREELIST > 0
279 struct _Py_dict_state *state = &interp->dict_state;
280 state->numfree = -1;
281 state->keys_numfree = -1;
282 #endif
283 }
284
285 static inline Py_hash_t
unicode_get_hash(PyObject * o)286 unicode_get_hash(PyObject *o)
287 {
288 assert(PyUnicode_CheckExact(o));
289 return _PyASCIIObject_CAST(o)->hash;
290 }
291
292 /* Print summary info about the state of the optimized allocator */
293 void
_PyDict_DebugMallocStats(FILE * out)294 _PyDict_DebugMallocStats(FILE *out)
295 {
296 #if PyDict_MAXFREELIST > 0
297 struct _Py_dict_state *state = get_dict_state();
298 _PyDebugAllocatorStats(out, "free PyDictObject",
299 state->numfree, sizeof(PyDictObject));
300 #endif
301 }
302
303 #define DK_MASK(dk) (DK_SIZE(dk)-1)
304
305 static void free_keys_object(PyDictKeysObject *keys);
306
307 static inline void
dictkeys_incref(PyDictKeysObject * dk)308 dictkeys_incref(PyDictKeysObject *dk)
309 {
310 #ifdef Py_REF_DEBUG
311 _Py_RefTotal++;
312 #endif
313 dk->dk_refcnt++;
314 }
315
316 static inline void
dictkeys_decref(PyDictKeysObject * dk)317 dictkeys_decref(PyDictKeysObject *dk)
318 {
319 assert(dk->dk_refcnt > 0);
320 #ifdef Py_REF_DEBUG
321 _Py_RefTotal--;
322 #endif
323 if (--dk->dk_refcnt == 0) {
324 free_keys_object(dk);
325 }
326 }
327
328 /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
329 static inline Py_ssize_t
dictkeys_get_index(const PyDictKeysObject * keys,Py_ssize_t i)330 dictkeys_get_index(const PyDictKeysObject *keys, Py_ssize_t i)
331 {
332 int log2size = DK_LOG_SIZE(keys);
333 Py_ssize_t ix;
334
335 if (log2size < 8) {
336 const int8_t *indices = (const int8_t*)(keys->dk_indices);
337 ix = indices[i];
338 }
339 else if (log2size < 16) {
340 const int16_t *indices = (const int16_t*)(keys->dk_indices);
341 ix = indices[i];
342 }
343 #if SIZEOF_VOID_P > 4
344 else if (log2size >= 32) {
345 const int64_t *indices = (const int64_t*)(keys->dk_indices);
346 ix = indices[i];
347 }
348 #endif
349 else {
350 const int32_t *indices = (const int32_t*)(keys->dk_indices);
351 ix = indices[i];
352 }
353 assert(ix >= DKIX_DUMMY);
354 return ix;
355 }
356
357 /* write to indices. */
358 static inline void
dictkeys_set_index(PyDictKeysObject * keys,Py_ssize_t i,Py_ssize_t ix)359 dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
360 {
361 int log2size = DK_LOG_SIZE(keys);
362
363 assert(ix >= DKIX_DUMMY);
364 assert(keys->dk_version == 0);
365
366 if (log2size < 8) {
367 int8_t *indices = (int8_t*)(keys->dk_indices);
368 assert(ix <= 0x7f);
369 indices[i] = (char)ix;
370 }
371 else if (log2size < 16) {
372 int16_t *indices = (int16_t*)(keys->dk_indices);
373 assert(ix <= 0x7fff);
374 indices[i] = (int16_t)ix;
375 }
376 #if SIZEOF_VOID_P > 4
377 else if (log2size >= 32) {
378 int64_t *indices = (int64_t*)(keys->dk_indices);
379 indices[i] = ix;
380 }
381 #endif
382 else {
383 int32_t *indices = (int32_t*)(keys->dk_indices);
384 assert(ix <= 0x7fffffff);
385 indices[i] = (int32_t)ix;
386 }
387 }
388
389
390 /* USABLE_FRACTION is the maximum dictionary load.
391 * Increasing this ratio makes dictionaries more dense resulting in more
392 * collisions. Decreasing it improves sparseness at the expense of spreading
393 * indices over more cache lines and at the cost of total memory consumed.
394 *
395 * USABLE_FRACTION must obey the following:
396 * (0 < USABLE_FRACTION(n) < n) for all n >= 2
397 *
398 * USABLE_FRACTION should be quick to calculate.
399 * Fractions around 1/2 to 2/3 seem to work well in practice.
400 */
401 #define USABLE_FRACTION(n) (((n) << 1)/3)
402
403 /* Find the smallest dk_size >= minsize. */
404 static inline uint8_t
calculate_log2_keysize(Py_ssize_t minsize)405 calculate_log2_keysize(Py_ssize_t minsize)
406 {
407 #if SIZEOF_LONG == SIZEOF_SIZE_T
408 minsize = (minsize | PyDict_MINSIZE) - 1;
409 return _Py_bit_length(minsize | (PyDict_MINSIZE-1));
410 #elif defined(_MSC_VER)
411 // On 64bit Windows, sizeof(long) == 4.
412 minsize = (minsize | PyDict_MINSIZE) - 1;
413 unsigned long msb;
414 _BitScanReverse64(&msb, (uint64_t)minsize);
415 return (uint8_t)(msb + 1);
416 #else
417 uint8_t log2_size;
418 for (log2_size = PyDict_LOG_MINSIZE;
419 (((Py_ssize_t)1) << log2_size) < minsize;
420 log2_size++)
421 ;
422 return log2_size;
423 #endif
424 }
425
426 /* estimate_keysize is reverse function of USABLE_FRACTION.
427 *
428 * This can be used to reserve enough size to insert n entries without
429 * resizing.
430 */
431 static inline uint8_t
estimate_log2_keysize(Py_ssize_t n)432 estimate_log2_keysize(Py_ssize_t n)
433 {
434 return calculate_log2_keysize((n*3 + 1) / 2);
435 }
436
437
438 /* GROWTH_RATE. Growth rate upon hitting maximum load.
439 * Currently set to used*3.
440 * This means that dicts double in size when growing without deletions,
441 * but have more head room when the number of deletions is on a par with the
442 * number of insertions. See also bpo-17563 and bpo-33205.
443 *
444 * GROWTH_RATE was set to used*4 up to version 3.2.
445 * GROWTH_RATE was set to used*2 in version 3.3.0
446 * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0.
447 */
448 #define GROWTH_RATE(d) ((d)->ma_used*3)
449
450 /* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
451 * (which cannot fail and thus can do no allocation).
452 */
453 static PyDictKeysObject empty_keys_struct = {
454 1, /* dk_refcnt */
455 0, /* dk_log2_size */
456 0, /* dk_log2_index_bytes */
457 DICT_KEYS_UNICODE, /* dk_kind */
458 1, /* dk_version */
459 0, /* dk_usable (immutable) */
460 0, /* dk_nentries */
461 {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
462 DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
463 };
464
465 #define Py_EMPTY_KEYS &empty_keys_struct
466
467 /* Uncomment to check the dict content in _PyDict_CheckConsistency() */
468 // #define DEBUG_PYDICT
469
470 #ifdef DEBUG_PYDICT
471 # define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 1))
472 #else
473 # define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 0))
474 #endif
475
476 static inline int
get_index_from_order(PyDictObject * mp,Py_ssize_t i)477 get_index_from_order(PyDictObject *mp, Py_ssize_t i)
478 {
479 assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
480 assert(i < (((char *)mp->ma_values)[-2]));
481 return ((char *)mp->ma_values)[-3-i];
482 }
483
484 #ifdef DEBUG_PYDICT
485 static void
dump_entries(PyDictKeysObject * dk)486 dump_entries(PyDictKeysObject *dk)
487 {
488 for (Py_ssize_t i = 0; i < dk->dk_nentries; i++) {
489 if (DK_IS_UNICODE(dk)) {
490 PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(dk)[i];
491 printf("key=%p value=%p\n", ep->me_key, ep->me_value);
492 }
493 else {
494 PyDictKeyEntry *ep = &DK_ENTRIES(dk)[i];
495 printf("key=%p hash=%lx value=%p\n", ep->me_key, ep->me_hash, ep->me_value);
496 }
497 }
498 }
499 #endif
500
501 int
_PyDict_CheckConsistency(PyObject * op,int check_content)502 _PyDict_CheckConsistency(PyObject *op, int check_content)
503 {
504 #define CHECK(expr) \
505 do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
506
507 assert(op != NULL);
508 CHECK(PyDict_Check(op));
509 PyDictObject *mp = (PyDictObject *)op;
510
511 PyDictKeysObject *keys = mp->ma_keys;
512 int splitted = _PyDict_HasSplitTable(mp);
513 Py_ssize_t usable = USABLE_FRACTION(DK_SIZE(keys));
514
515 CHECK(0 <= mp->ma_used && mp->ma_used <= usable);
516 CHECK(0 <= keys->dk_usable && keys->dk_usable <= usable);
517 CHECK(0 <= keys->dk_nentries && keys->dk_nentries <= usable);
518 CHECK(keys->dk_usable + keys->dk_nentries <= usable);
519
520 if (!splitted) {
521 /* combined table */
522 CHECK(keys->dk_kind != DICT_KEYS_SPLIT);
523 CHECK(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS);
524 }
525 else {
526 CHECK(keys->dk_kind == DICT_KEYS_SPLIT);
527 CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
528 }
529
530 if (check_content) {
531 for (Py_ssize_t i=0; i < DK_SIZE(keys); i++) {
532 Py_ssize_t ix = dictkeys_get_index(keys, i);
533 CHECK(DKIX_DUMMY <= ix && ix <= usable);
534 }
535
536 if (keys->dk_kind == DICT_KEYS_GENERAL) {
537 PyDictKeyEntry *entries = DK_ENTRIES(keys);
538 for (Py_ssize_t i=0; i < usable; i++) {
539 PyDictKeyEntry *entry = &entries[i];
540 PyObject *key = entry->me_key;
541
542 if (key != NULL) {
543 /* test_dict fails if PyObject_Hash() is called again */
544 CHECK(entry->me_hash != -1);
545 CHECK(entry->me_value != NULL);
546
547 if (PyUnicode_CheckExact(key)) {
548 Py_hash_t hash = unicode_get_hash(key);
549 CHECK(entry->me_hash == hash);
550 }
551 }
552 }
553 }
554 else {
555 PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
556 for (Py_ssize_t i=0; i < usable; i++) {
557 PyDictUnicodeEntry *entry = &entries[i];
558 PyObject *key = entry->me_key;
559
560 if (key != NULL) {
561 CHECK(PyUnicode_CheckExact(key));
562 Py_hash_t hash = unicode_get_hash(key);
563 CHECK(hash != -1);
564 if (!splitted) {
565 CHECK(entry->me_value != NULL);
566 }
567 }
568
569 if (splitted) {
570 CHECK(entry->me_value == NULL);
571 }
572 }
573 }
574
575 if (splitted) {
576 CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
577 /* splitted table */
578 int duplicate_check = 0;
579 for (Py_ssize_t i=0; i < mp->ma_used; i++) {
580 int index = get_index_from_order(mp, i);
581 CHECK((duplicate_check & (1<<index)) == 0);
582 duplicate_check |= (1<<index);
583 CHECK(mp->ma_values->values[index] != NULL);
584 }
585 }
586 }
587 return 1;
588
589 #undef CHECK
590 }
591
592
593 static PyDictKeysObject*
new_keys_object(uint8_t log2_size,bool unicode)594 new_keys_object(uint8_t log2_size, bool unicode)
595 {
596 PyDictKeysObject *dk;
597 Py_ssize_t usable;
598 int log2_bytes;
599 size_t entry_size = unicode ? sizeof(PyDictUnicodeEntry) : sizeof(PyDictKeyEntry);
600
601 assert(log2_size >= PyDict_LOG_MINSIZE);
602
603 usable = USABLE_FRACTION((size_t)1<<log2_size);
604 if (log2_size < 8) {
605 log2_bytes = log2_size;
606 }
607 else if (log2_size < 16) {
608 log2_bytes = log2_size + 1;
609 }
610 #if SIZEOF_VOID_P > 4
611 else if (log2_size >= 32) {
612 log2_bytes = log2_size + 3;
613 }
614 #endif
615 else {
616 log2_bytes = log2_size + 2;
617 }
618
619 #if PyDict_MAXFREELIST > 0
620 struct _Py_dict_state *state = get_dict_state();
621 #ifdef Py_DEBUG
622 // new_keys_object() must not be called after _PyDict_Fini()
623 assert(state->keys_numfree != -1);
624 #endif
625 if (log2_size == PyDict_LOG_MINSIZE && unicode && state->keys_numfree > 0) {
626 dk = state->keys_free_list[--state->keys_numfree];
627 OBJECT_STAT_INC(from_freelist);
628 }
629 else
630 #endif
631 {
632 dk = PyObject_Malloc(sizeof(PyDictKeysObject)
633 + ((size_t)1 << log2_bytes)
634 + entry_size * usable);
635 if (dk == NULL) {
636 PyErr_NoMemory();
637 return NULL;
638 }
639 }
640 #ifdef Py_REF_DEBUG
641 _Py_RefTotal++;
642 #endif
643 dk->dk_refcnt = 1;
644 dk->dk_log2_size = log2_size;
645 dk->dk_log2_index_bytes = log2_bytes;
646 dk->dk_kind = unicode ? DICT_KEYS_UNICODE : DICT_KEYS_GENERAL;
647 dk->dk_nentries = 0;
648 dk->dk_usable = usable;
649 dk->dk_version = 0;
650 memset(&dk->dk_indices[0], 0xff, ((size_t)1 << log2_bytes));
651 memset(&dk->dk_indices[(size_t)1 << log2_bytes], 0, entry_size * usable);
652 return dk;
653 }
654
655 static void
free_keys_object(PyDictKeysObject * keys)656 free_keys_object(PyDictKeysObject *keys)
657 {
658 assert(keys != Py_EMPTY_KEYS);
659 if (DK_IS_UNICODE(keys)) {
660 PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
661 Py_ssize_t i, n;
662 for (i = 0, n = keys->dk_nentries; i < n; i++) {
663 Py_XDECREF(entries[i].me_key);
664 Py_XDECREF(entries[i].me_value);
665 }
666 }
667 else {
668 PyDictKeyEntry *entries = DK_ENTRIES(keys);
669 Py_ssize_t i, n;
670 for (i = 0, n = keys->dk_nentries; i < n; i++) {
671 Py_XDECREF(entries[i].me_key);
672 Py_XDECREF(entries[i].me_value);
673 }
674 }
675 #if PyDict_MAXFREELIST > 0
676 struct _Py_dict_state *state = get_dict_state();
677 #ifdef Py_DEBUG
678 // free_keys_object() must not be called after _PyDict_Fini()
679 assert(state->keys_numfree != -1);
680 #endif
681 if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE
682 && state->keys_numfree < PyDict_MAXFREELIST
683 && DK_IS_UNICODE(keys)) {
684 state->keys_free_list[state->keys_numfree++] = keys;
685 OBJECT_STAT_INC(to_freelist);
686 return;
687 }
688 #endif
689 PyObject_Free(keys);
690 }
691
692 static inline PyDictValues*
new_values(Py_ssize_t size)693 new_values(Py_ssize_t size)
694 {
695 assert(size > 0);
696 size_t prefix_size = _Py_SIZE_ROUND_UP(size+2, sizeof(PyObject *));
697 assert(prefix_size < 256);
698 size_t n = prefix_size + size * sizeof(PyObject *);
699 uint8_t *mem = PyMem_Malloc(n);
700 if (mem == NULL) {
701 return NULL;
702 }
703 assert(prefix_size % sizeof(PyObject *) == 0);
704 mem[prefix_size-1] = (uint8_t)prefix_size;
705 return (PyDictValues*)(mem + prefix_size);
706 }
707
708 static inline void
free_values(PyDictValues * values)709 free_values(PyDictValues *values)
710 {
711 int prefix_size = ((uint8_t *)values)[-1];
712 PyMem_Free(((char *)values)-prefix_size);
713 }
714
715 /* Consumes a reference to the keys object */
716 static PyObject *
new_dict(PyDictKeysObject * keys,PyDictValues * values,Py_ssize_t used,int free_values_on_failure)717 new_dict(PyDictKeysObject *keys, PyDictValues *values, Py_ssize_t used, int free_values_on_failure)
718 {
719 PyDictObject *mp;
720 assert(keys != NULL);
721 #if PyDict_MAXFREELIST > 0
722 struct _Py_dict_state *state = get_dict_state();
723 #ifdef Py_DEBUG
724 // new_dict() must not be called after _PyDict_Fini()
725 assert(state->numfree != -1);
726 #endif
727 if (state->numfree) {
728 mp = state->free_list[--state->numfree];
729 assert (mp != NULL);
730 assert (Py_IS_TYPE(mp, &PyDict_Type));
731 OBJECT_STAT_INC(from_freelist);
732 _Py_NewReference((PyObject *)mp);
733 }
734 else
735 #endif
736 {
737 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
738 if (mp == NULL) {
739 dictkeys_decref(keys);
740 if (free_values_on_failure) {
741 free_values(values);
742 }
743 return NULL;
744 }
745 }
746 mp->ma_keys = keys;
747 mp->ma_values = values;
748 mp->ma_used = used;
749 mp->ma_version_tag = DICT_NEXT_VERSION();
750 ASSERT_CONSISTENT(mp);
751 return (PyObject *)mp;
752 }
753
754 static inline Py_ssize_t
shared_keys_usable_size(PyDictKeysObject * keys)755 shared_keys_usable_size(PyDictKeysObject *keys)
756 {
757 return keys->dk_nentries + keys->dk_usable;
758 }
759
760 /* Consumes a reference to the keys object */
761 static PyObject *
new_dict_with_shared_keys(PyDictKeysObject * keys)762 new_dict_with_shared_keys(PyDictKeysObject *keys)
763 {
764 PyDictValues *values;
765 Py_ssize_t i, size;
766
767 size = shared_keys_usable_size(keys);
768 values = new_values(size);
769 if (values == NULL) {
770 dictkeys_decref(keys);
771 return PyErr_NoMemory();
772 }
773 ((char *)values)[-2] = 0;
774 for (i = 0; i < size; i++) {
775 values->values[i] = NULL;
776 }
777 return new_dict(keys, values, 0, 1);
778 }
779
780
781 static PyDictKeysObject *
clone_combined_dict_keys(PyDictObject * orig)782 clone_combined_dict_keys(PyDictObject *orig)
783 {
784 assert(PyDict_Check(orig));
785 assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter);
786 assert(orig->ma_values == NULL);
787 assert(orig->ma_keys->dk_refcnt == 1);
788
789 Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
790 PyDictKeysObject *keys = PyObject_Malloc(keys_size);
791 if (keys == NULL) {
792 PyErr_NoMemory();
793 return NULL;
794 }
795
796 memcpy(keys, orig->ma_keys, keys_size);
797
798 /* After copying key/value pairs, we need to incref all
799 keys and values and they are about to be co-owned by a
800 new dict object. */
801 PyObject **pkey, **pvalue;
802 size_t offs;
803 if (DK_IS_UNICODE(orig->ma_keys)) {
804 PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(keys);
805 pkey = &ep0->me_key;
806 pvalue = &ep0->me_value;
807 offs = sizeof(PyDictUnicodeEntry) / sizeof(PyObject*);
808 }
809 else {
810 PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
811 pkey = &ep0->me_key;
812 pvalue = &ep0->me_value;
813 offs = sizeof(PyDictKeyEntry) / sizeof(PyObject*);
814 }
815
816 Py_ssize_t n = keys->dk_nentries;
817 for (Py_ssize_t i = 0; i < n; i++) {
818 PyObject *value = *pvalue;
819 if (value != NULL) {
820 Py_INCREF(value);
821 Py_INCREF(*pkey);
822 }
823 pvalue += offs;
824 pkey += offs;
825 }
826
827 /* Since we copied the keys table we now have an extra reference
828 in the system. Manually call increment _Py_RefTotal to signal that
829 we have it now; calling dictkeys_incref would be an error as
830 keys->dk_refcnt is already set to 1 (after memcpy). */
831 #ifdef Py_REF_DEBUG
832 _Py_RefTotal++;
833 #endif
834 return keys;
835 }
836
837 PyObject *
PyDict_New(void)838 PyDict_New(void)
839 {
840 dictkeys_incref(Py_EMPTY_KEYS);
841 return new_dict(Py_EMPTY_KEYS, NULL, 0, 0);
842 }
843
844 /* Search index of hash table from offset of entry table */
845 static Py_ssize_t
lookdict_index(PyDictKeysObject * k,Py_hash_t hash,Py_ssize_t index)846 lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
847 {
848 size_t mask = DK_MASK(k);
849 size_t perturb = (size_t)hash;
850 size_t i = (size_t)hash & mask;
851
852 for (;;) {
853 Py_ssize_t ix = dictkeys_get_index(k, i);
854 if (ix == index) {
855 return i;
856 }
857 if (ix == DKIX_EMPTY) {
858 return DKIX_EMPTY;
859 }
860 perturb >>= PERTURB_SHIFT;
861 i = mask & (i*5 + perturb + 1);
862 }
863 Py_UNREACHABLE();
864 }
865
866 // Search non-Unicode key from Unicode table
867 static Py_ssize_t
unicodekeys_lookup_generic(PyDictObject * mp,PyDictKeysObject * dk,PyObject * key,Py_hash_t hash)868 unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
869 {
870 PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk);
871 size_t mask = DK_MASK(dk);
872 size_t perturb = hash;
873 size_t i = (size_t)hash & mask;
874 Py_ssize_t ix;
875 for (;;) {
876 ix = dictkeys_get_index(dk, i);
877 if (ix >= 0) {
878 PyDictUnicodeEntry *ep = &ep0[ix];
879 assert(ep->me_key != NULL);
880 assert(PyUnicode_CheckExact(ep->me_key));
881 if (ep->me_key == key) {
882 return ix;
883 }
884 if (unicode_get_hash(ep->me_key) == hash) {
885 PyObject *startkey = ep->me_key;
886 Py_INCREF(startkey);
887 int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
888 Py_DECREF(startkey);
889 if (cmp < 0) {
890 return DKIX_ERROR;
891 }
892 if (dk == mp->ma_keys && ep->me_key == startkey) {
893 if (cmp > 0) {
894 return ix;
895 }
896 }
897 else {
898 /* The dict was mutated, restart */
899 return DKIX_KEY_CHANGED;
900 }
901 }
902 }
903 else if (ix == DKIX_EMPTY) {
904 return DKIX_EMPTY;
905 }
906 perturb >>= PERTURB_SHIFT;
907 i = mask & (i*5 + perturb + 1);
908 }
909 Py_UNREACHABLE();
910 }
911
912 // Search Unicode key from Unicode table.
913 static Py_ssize_t _Py_HOT_FUNCTION
unicodekeys_lookup_unicode(PyDictKeysObject * dk,PyObject * key,Py_hash_t hash)914 unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
915 {
916 PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk);
917 size_t mask = DK_MASK(dk);
918 size_t perturb = hash;
919 size_t i = (size_t)hash & mask;
920 Py_ssize_t ix;
921 for (;;) {
922 ix = dictkeys_get_index(dk, i);
923 if (ix >= 0) {
924 PyDictUnicodeEntry *ep = &ep0[ix];
925 assert(ep->me_key != NULL);
926 assert(PyUnicode_CheckExact(ep->me_key));
927 if (ep->me_key == key ||
928 (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) {
929 return ix;
930 }
931 }
932 else if (ix == DKIX_EMPTY) {
933 return DKIX_EMPTY;
934 }
935 perturb >>= PERTURB_SHIFT;
936 i = mask & (i*5 + perturb + 1);
937 ix = dictkeys_get_index(dk, i);
938 if (ix >= 0) {
939 PyDictUnicodeEntry *ep = &ep0[ix];
940 assert(ep->me_key != NULL);
941 assert(PyUnicode_CheckExact(ep->me_key));
942 if (ep->me_key == key ||
943 (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) {
944 return ix;
945 }
946 }
947 else if (ix == DKIX_EMPTY) {
948 return DKIX_EMPTY;
949 }
950 perturb >>= PERTURB_SHIFT;
951 i = mask & (i*5 + perturb + 1);
952 }
953 Py_UNREACHABLE();
954 }
955
956 // Search key from Generic table.
957 static Py_ssize_t
dictkeys_generic_lookup(PyDictObject * mp,PyDictKeysObject * dk,PyObject * key,Py_hash_t hash)958 dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
959 {
960 PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
961 size_t mask = DK_MASK(dk);
962 size_t perturb = hash;
963 size_t i = (size_t)hash & mask;
964 Py_ssize_t ix;
965 for (;;) {
966 ix = dictkeys_get_index(dk, i);
967 if (ix >= 0) {
968 PyDictKeyEntry *ep = &ep0[ix];
969 assert(ep->me_key != NULL);
970 if (ep->me_key == key) {
971 return ix;
972 }
973 if (ep->me_hash == hash) {
974 PyObject *startkey = ep->me_key;
975 Py_INCREF(startkey);
976 int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
977 Py_DECREF(startkey);
978 if (cmp < 0) {
979 return DKIX_ERROR;
980 }
981 if (dk == mp->ma_keys && ep->me_key == startkey) {
982 if (cmp > 0) {
983 return ix;
984 }
985 }
986 else {
987 /* The dict was mutated, restart */
988 return DKIX_KEY_CHANGED;
989 }
990 }
991 }
992 else if (ix == DKIX_EMPTY) {
993 return DKIX_EMPTY;
994 }
995 perturb >>= PERTURB_SHIFT;
996 i = mask & (i*5 + perturb + 1);
997 }
998 Py_UNREACHABLE();
999 }
1000
1001 /* Lookup a string in a (all unicode) dict keys.
1002 * Returns DKIX_ERROR if key is not a string,
1003 * or if the dict keys is not all strings.
1004 * If the keys is present then return the index of key.
1005 * If the key is not present then return DKIX_EMPTY.
1006 */
1007 Py_ssize_t
_PyDictKeys_StringLookup(PyDictKeysObject * dk,PyObject * key)1008 _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
1009 {
1010 DictKeysKind kind = dk->dk_kind;
1011 if (!PyUnicode_CheckExact(key) || kind == DICT_KEYS_GENERAL) {
1012 return DKIX_ERROR;
1013 }
1014 Py_hash_t hash = unicode_get_hash(key);
1015 if (hash == -1) {
1016 hash = PyUnicode_Type.tp_hash(key);
1017 if (hash == -1) {
1018 PyErr_Clear();
1019 return DKIX_ERROR;
1020 }
1021 }
1022 return unicodekeys_lookup_unicode(dk, key, hash);
1023 }
1024
1025 /*
1026 The basic lookup function used by all operations.
1027 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
1028 Open addressing is preferred over chaining since the link overhead for
1029 chaining would be substantial (100% with typical malloc overhead).
1030
1031 The initial probe index is computed as hash mod the table size. Subsequent
1032 probe indices are computed as explained earlier.
1033
1034 All arithmetic on hash should ignore overflow.
1035
1036 _Py_dict_lookup() is general-purpose, and may return DKIX_ERROR if (and only if) a
1037 comparison raises an exception.
1038 When the key isn't found a DKIX_EMPTY is returned.
1039 */
1040 Py_ssize_t
_Py_dict_lookup(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)1041 _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr)
1042 {
1043 PyDictKeysObject *dk;
1044 DictKeysKind kind;
1045 Py_ssize_t ix;
1046
1047 start:
1048 dk = mp->ma_keys;
1049 kind = dk->dk_kind;
1050
1051 if (kind != DICT_KEYS_GENERAL) {
1052 if (PyUnicode_CheckExact(key)) {
1053 ix = unicodekeys_lookup_unicode(dk, key, hash);
1054 }
1055 else {
1056 ix = unicodekeys_lookup_generic(mp, dk, key, hash);
1057 if (ix == DKIX_KEY_CHANGED) {
1058 goto start;
1059 }
1060 }
1061
1062 if (ix >= 0) {
1063 if (kind == DICT_KEYS_SPLIT) {
1064 *value_addr = mp->ma_values->values[ix];
1065 }
1066 else {
1067 *value_addr = DK_UNICODE_ENTRIES(dk)[ix].me_value;
1068 }
1069 }
1070 else {
1071 *value_addr = NULL;
1072 }
1073 }
1074 else {
1075 ix = dictkeys_generic_lookup(mp, dk, key, hash);
1076 if (ix == DKIX_KEY_CHANGED) {
1077 goto start;
1078 }
1079 if (ix >= 0) {
1080 *value_addr = DK_ENTRIES(dk)[ix].me_value;
1081 }
1082 else {
1083 *value_addr = NULL;
1084 }
1085 }
1086
1087 return ix;
1088 }
1089
1090 int
_PyDict_HasOnlyStringKeys(PyObject * dict)1091 _PyDict_HasOnlyStringKeys(PyObject *dict)
1092 {
1093 Py_ssize_t pos = 0;
1094 PyObject *key, *value;
1095 assert(PyDict_Check(dict));
1096 /* Shortcut */
1097 if (((PyDictObject *)dict)->ma_keys->dk_kind != DICT_KEYS_GENERAL)
1098 return 1;
1099 while (PyDict_Next(dict, &pos, &key, &value))
1100 if (!PyUnicode_Check(key))
1101 return 0;
1102 return 1;
1103 }
1104
1105 #define MAINTAIN_TRACKING(mp, key, value) \
1106 do { \
1107 if (!_PyObject_GC_IS_TRACKED(mp)) { \
1108 if (_PyObject_GC_MAY_BE_TRACKED(key) || \
1109 _PyObject_GC_MAY_BE_TRACKED(value)) { \
1110 _PyObject_GC_TRACK(mp); \
1111 } \
1112 } \
1113 } while(0)
1114
1115 void
_PyDict_MaybeUntrack(PyObject * op)1116 _PyDict_MaybeUntrack(PyObject *op)
1117 {
1118 PyDictObject *mp;
1119 PyObject *value;
1120 Py_ssize_t i, numentries;
1121
1122 if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
1123 return;
1124
1125 mp = (PyDictObject *) op;
1126 numentries = mp->ma_keys->dk_nentries;
1127 if (_PyDict_HasSplitTable(mp)) {
1128 for (i = 0; i < numentries; i++) {
1129 if ((value = mp->ma_values->values[i]) == NULL)
1130 continue;
1131 if (_PyObject_GC_MAY_BE_TRACKED(value)) {
1132 return;
1133 }
1134 }
1135 }
1136 else {
1137 if (DK_IS_UNICODE(mp->ma_keys)) {
1138 PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys);
1139 for (i = 0; i < numentries; i++) {
1140 if ((value = ep0[i].me_value) == NULL)
1141 continue;
1142 if (_PyObject_GC_MAY_BE_TRACKED(value))
1143 return;
1144 }
1145 }
1146 else {
1147 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
1148 for (i = 0; i < numentries; i++) {
1149 if ((value = ep0[i].me_value) == NULL)
1150 continue;
1151 if (_PyObject_GC_MAY_BE_TRACKED(value) ||
1152 _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
1153 return;
1154 }
1155 }
1156 }
1157 _PyObject_GC_UNTRACK(op);
1158 }
1159
1160 /* Internal function to find slot for an item from its hash
1161 when it is known that the key is not present in the dict.
1162
1163 The dict must be combined. */
1164 static Py_ssize_t
find_empty_slot(PyDictKeysObject * keys,Py_hash_t hash)1165 find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
1166 {
1167 assert(keys != NULL);
1168
1169 const size_t mask = DK_MASK(keys);
1170 size_t i = hash & mask;
1171 Py_ssize_t ix = dictkeys_get_index(keys, i);
1172 for (size_t perturb = hash; ix >= 0;) {
1173 perturb >>= PERTURB_SHIFT;
1174 i = (i*5 + perturb + 1) & mask;
1175 ix = dictkeys_get_index(keys, i);
1176 }
1177 return i;
1178 }
1179
1180 static int
insertion_resize(PyDictObject * mp,int unicode)1181 insertion_resize(PyDictObject *mp, int unicode)
1182 {
1183 return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode);
1184 }
1185
1186 static Py_ssize_t
insert_into_dictkeys(PyDictKeysObject * keys,PyObject * name)1187 insert_into_dictkeys(PyDictKeysObject *keys, PyObject *name)
1188 {
1189 assert(PyUnicode_CheckExact(name));
1190 Py_hash_t hash = unicode_get_hash(name);
1191 if (hash == -1) {
1192 hash = PyUnicode_Type.tp_hash(name);
1193 if (hash == -1) {
1194 PyErr_Clear();
1195 return DKIX_EMPTY;
1196 }
1197 }
1198 Py_ssize_t ix = unicodekeys_lookup_unicode(keys, name, hash);
1199 if (ix == DKIX_EMPTY) {
1200 if (keys->dk_usable <= 0) {
1201 return DKIX_EMPTY;
1202 }
1203 Py_INCREF(name);
1204 /* Insert into new slot. */
1205 keys->dk_version = 0;
1206 Py_ssize_t hashpos = find_empty_slot(keys, hash);
1207 ix = keys->dk_nentries;
1208 PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix];
1209 dictkeys_set_index(keys, hashpos, ix);
1210 assert(ep->me_key == NULL);
1211 ep->me_key = name;
1212 keys->dk_usable--;
1213 keys->dk_nentries++;
1214 }
1215 assert (ix < SHARED_KEYS_MAX_SIZE);
1216 return ix;
1217 }
1218
1219 /*
1220 Internal routine to insert a new item into the table.
1221 Used both by the internal resize routine and by the public insert routine.
1222 Returns -1 if an error occurred, or 0 on success.
1223 Consumes key and value references.
1224 */
1225 static int
insertdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1226 insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
1227 {
1228 PyObject *old_value;
1229
1230 if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) {
1231 if (insertion_resize(mp, 0) < 0)
1232 goto Fail;
1233 assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL);
1234 }
1235
1236 Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value);
1237 if (ix == DKIX_ERROR)
1238 goto Fail;
1239
1240 MAINTAIN_TRACKING(mp, key, value);
1241
1242 if (ix == DKIX_EMPTY) {
1243 /* Insert into new slot. */
1244 mp->ma_keys->dk_version = 0;
1245 assert(old_value == NULL);
1246 if (mp->ma_keys->dk_usable <= 0) {
1247 /* Need to resize. */
1248 if (insertion_resize(mp, 1) < 0)
1249 goto Fail;
1250 }
1251
1252 Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
1253 dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
1254
1255 if (DK_IS_UNICODE(mp->ma_keys)) {
1256 PyDictUnicodeEntry *ep;
1257 ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
1258 ep->me_key = key;
1259 if (mp->ma_values) {
1260 Py_ssize_t index = mp->ma_keys->dk_nentries;
1261 _PyDictValues_AddToInsertionOrder(mp->ma_values, index);
1262 assert (mp->ma_values->values[index] == NULL);
1263 mp->ma_values->values[index] = value;
1264 }
1265 else {
1266 ep->me_value = value;
1267 }
1268 }
1269 else {
1270 PyDictKeyEntry *ep;
1271 ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
1272 ep->me_key = key;
1273 ep->me_hash = hash;
1274 ep->me_value = value;
1275 }
1276 mp->ma_used++;
1277 mp->ma_version_tag = DICT_NEXT_VERSION();
1278 mp->ma_keys->dk_usable--;
1279 mp->ma_keys->dk_nentries++;
1280 assert(mp->ma_keys->dk_usable >= 0);
1281 ASSERT_CONSISTENT(mp);
1282 return 0;
1283 }
1284
1285 if (old_value != value) {
1286 if (_PyDict_HasSplitTable(mp)) {
1287 mp->ma_values->values[ix] = value;
1288 if (old_value == NULL) {
1289 _PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
1290 mp->ma_used++;
1291 }
1292 }
1293 else {
1294 assert(old_value != NULL);
1295 if (DK_IS_UNICODE(mp->ma_keys)) {
1296 DK_UNICODE_ENTRIES(mp->ma_keys)[ix].me_value = value;
1297 }
1298 else {
1299 DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1300 }
1301 }
1302 mp->ma_version_tag = DICT_NEXT_VERSION();
1303 }
1304 Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
1305 ASSERT_CONSISTENT(mp);
1306 Py_DECREF(key);
1307 return 0;
1308
1309 Fail:
1310 Py_DECREF(value);
1311 Py_DECREF(key);
1312 return -1;
1313 }
1314
1315 // Same to insertdict but specialized for ma_keys = Py_EMPTY_KEYS.
1316 // Consumes key and value references.
1317 static int
insert_to_emptydict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1318 insert_to_emptydict(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1319 PyObject *value)
1320 {
1321 assert(mp->ma_keys == Py_EMPTY_KEYS);
1322
1323 int unicode = PyUnicode_CheckExact(key);
1324 PyDictKeysObject *newkeys = new_keys_object(PyDict_LOG_MINSIZE, unicode);
1325 if (newkeys == NULL) {
1326 Py_DECREF(key);
1327 Py_DECREF(value);
1328 return -1;
1329 }
1330 dictkeys_decref(Py_EMPTY_KEYS);
1331 mp->ma_keys = newkeys;
1332 mp->ma_values = NULL;
1333
1334 MAINTAIN_TRACKING(mp, key, value);
1335
1336 size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1);
1337 dictkeys_set_index(mp->ma_keys, hashpos, 0);
1338 if (unicode) {
1339 PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mp->ma_keys);
1340 ep->me_key = key;
1341 ep->me_value = value;
1342 }
1343 else {
1344 PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys);
1345 ep->me_key = key;
1346 ep->me_hash = hash;
1347 ep->me_value = value;
1348 }
1349 mp->ma_used++;
1350 mp->ma_version_tag = DICT_NEXT_VERSION();
1351 mp->ma_keys->dk_usable--;
1352 mp->ma_keys->dk_nentries++;
1353 return 0;
1354 }
1355
1356 /*
1357 Internal routine used by dictresize() to build a hashtable of entries.
1358 */
1359 static void
build_indices_generic(PyDictKeysObject * keys,PyDictKeyEntry * ep,Py_ssize_t n)1360 build_indices_generic(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
1361 {
1362 size_t mask = DK_MASK(keys);
1363 for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
1364 Py_hash_t hash = ep->me_hash;
1365 size_t i = hash & mask;
1366 for (size_t perturb = hash; dictkeys_get_index(keys, i) != DKIX_EMPTY;) {
1367 perturb >>= PERTURB_SHIFT;
1368 i = mask & (i*5 + perturb + 1);
1369 }
1370 dictkeys_set_index(keys, i, ix);
1371 }
1372 }
1373
1374 static void
build_indices_unicode(PyDictKeysObject * keys,PyDictUnicodeEntry * ep,Py_ssize_t n)1375 build_indices_unicode(PyDictKeysObject *keys, PyDictUnicodeEntry *ep, Py_ssize_t n)
1376 {
1377 size_t mask = DK_MASK(keys);
1378 for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
1379 Py_hash_t hash = unicode_get_hash(ep->me_key);
1380 assert(hash != -1);
1381 size_t i = hash & mask;
1382 for (size_t perturb = hash; dictkeys_get_index(keys, i) != DKIX_EMPTY;) {
1383 perturb >>= PERTURB_SHIFT;
1384 i = mask & (i*5 + perturb + 1);
1385 }
1386 dictkeys_set_index(keys, i, ix);
1387 }
1388 }
1389
1390 /*
1391 Restructure the table by allocating a new table and reinserting all
1392 items again. When entries have been deleted, the new table may
1393 actually be smaller than the old one.
1394 If a table is split (its keys and hashes are shared, its values are not),
1395 then the values are temporarily copied into the table, it is resized as
1396 a combined table, then the me_value slots in the old table are NULLed out.
1397 After resizing a table is always combined.
1398
1399 This function supports:
1400 - Unicode split -> Unicode combined or Generic
1401 - Unicode combined -> Unicode combined or Generic
1402 - Generic -> Generic
1403 */
1404 static int
dictresize(PyDictObject * mp,uint8_t log2_newsize,int unicode)1405 dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode)
1406 {
1407 PyDictKeysObject *oldkeys;
1408 PyDictValues *oldvalues;
1409
1410 if (log2_newsize >= SIZEOF_SIZE_T*8) {
1411 PyErr_NoMemory();
1412 return -1;
1413 }
1414 assert(log2_newsize >= PyDict_LOG_MINSIZE);
1415
1416 oldkeys = mp->ma_keys;
1417 oldvalues = mp->ma_values;
1418
1419 if (!DK_IS_UNICODE(oldkeys)) {
1420 unicode = 0;
1421 }
1422
1423 /* NOTE: Current odict checks mp->ma_keys to detect resize happen.
1424 * So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
1425 * TODO: Try reusing oldkeys when reimplement odict.
1426 */
1427
1428 /* Allocate a new table. */
1429 mp->ma_keys = new_keys_object(log2_newsize, unicode);
1430 if (mp->ma_keys == NULL) {
1431 mp->ma_keys = oldkeys;
1432 return -1;
1433 }
1434 // New table must be large enough.
1435 assert(mp->ma_keys->dk_usable >= mp->ma_used);
1436
1437 Py_ssize_t numentries = mp->ma_used;
1438
1439 if (oldvalues != NULL) {
1440 PyDictUnicodeEntry *oldentries = DK_UNICODE_ENTRIES(oldkeys);
1441 /* Convert split table into new combined table.
1442 * We must incref keys; we can transfer values.
1443 */
1444 if (mp->ma_keys->dk_kind == DICT_KEYS_GENERAL) {
1445 // split -> generic
1446 PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys);
1447
1448 for (Py_ssize_t i = 0; i < numentries; i++) {
1449 int index = get_index_from_order(mp, i);
1450 PyDictUnicodeEntry *ep = &oldentries[index];
1451 assert(oldvalues->values[index] != NULL);
1452 Py_INCREF(ep->me_key);
1453 newentries[i].me_key = ep->me_key;
1454 newentries[i].me_hash = unicode_get_hash(ep->me_key);
1455 newentries[i].me_value = oldvalues->values[index];
1456 }
1457 build_indices_generic(mp->ma_keys, newentries, numentries);
1458 }
1459 else { // split -> combined unicode
1460 PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(mp->ma_keys);
1461
1462 for (Py_ssize_t i = 0; i < numentries; i++) {
1463 int index = get_index_from_order(mp, i);
1464 PyDictUnicodeEntry *ep = &oldentries[index];
1465 assert(oldvalues->values[index] != NULL);
1466 Py_INCREF(ep->me_key);
1467 newentries[i].me_key = ep->me_key;
1468 newentries[i].me_value = oldvalues->values[index];
1469 }
1470 build_indices_unicode(mp->ma_keys, newentries, numentries);
1471 }
1472 dictkeys_decref(oldkeys);
1473 mp->ma_values = NULL;
1474 free_values(oldvalues);
1475 }
1476 else { // oldkeys is combined.
1477 if (oldkeys->dk_kind == DICT_KEYS_GENERAL) {
1478 // generic -> generic
1479 assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL);
1480 PyDictKeyEntry *oldentries = DK_ENTRIES(oldkeys);
1481 PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys);
1482 if (oldkeys->dk_nentries == numentries) {
1483 memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
1484 }
1485 else {
1486 PyDictKeyEntry *ep = oldentries;
1487 for (Py_ssize_t i = 0; i < numentries; i++) {
1488 while (ep->me_value == NULL)
1489 ep++;
1490 newentries[i] = *ep++;
1491 }
1492 }
1493 build_indices_generic(mp->ma_keys, newentries, numentries);
1494 }
1495 else { // oldkeys is combined unicode
1496 PyDictUnicodeEntry *oldentries = DK_UNICODE_ENTRIES(oldkeys);
1497 if (unicode) { // combined unicode -> combined unicode
1498 PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(mp->ma_keys);
1499 if (oldkeys->dk_nentries == numentries && mp->ma_keys->dk_kind == DICT_KEYS_UNICODE) {
1500 memcpy(newentries, oldentries, numentries * sizeof(PyDictUnicodeEntry));
1501 }
1502 else {
1503 PyDictUnicodeEntry *ep = oldentries;
1504 for (Py_ssize_t i = 0; i < numentries; i++) {
1505 while (ep->me_value == NULL)
1506 ep++;
1507 newentries[i] = *ep++;
1508 }
1509 }
1510 build_indices_unicode(mp->ma_keys, newentries, numentries);
1511 }
1512 else { // combined unicode -> generic
1513 PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys);
1514 PyDictUnicodeEntry *ep = oldentries;
1515 for (Py_ssize_t i = 0; i < numentries; i++) {
1516 while (ep->me_value == NULL)
1517 ep++;
1518 newentries[i].me_key = ep->me_key;
1519 newentries[i].me_hash = unicode_get_hash(ep->me_key);
1520 newentries[i].me_value = ep->me_value;
1521 ep++;
1522 }
1523 build_indices_generic(mp->ma_keys, newentries, numentries);
1524 }
1525 }
1526
1527 // We can not use free_keys_object here because key's reference
1528 // are moved already.
1529 #ifdef Py_REF_DEBUG
1530 _Py_RefTotal--;
1531 #endif
1532 if (oldkeys == Py_EMPTY_KEYS) {
1533 oldkeys->dk_refcnt--;
1534 assert(oldkeys->dk_refcnt > 0);
1535 }
1536 else {
1537 assert(oldkeys->dk_kind != DICT_KEYS_SPLIT);
1538 assert(oldkeys->dk_refcnt == 1);
1539 #if PyDict_MAXFREELIST > 0
1540 struct _Py_dict_state *state = get_dict_state();
1541 #ifdef Py_DEBUG
1542 // dictresize() must not be called after _PyDict_Fini()
1543 assert(state->keys_numfree != -1);
1544 #endif
1545 if (DK_LOG_SIZE(oldkeys) == PyDict_LOG_MINSIZE &&
1546 DK_IS_UNICODE(oldkeys) &&
1547 state->keys_numfree < PyDict_MAXFREELIST)
1548 {
1549 state->keys_free_list[state->keys_numfree++] = oldkeys;
1550 OBJECT_STAT_INC(to_freelist);
1551 }
1552 else
1553 #endif
1554 {
1555 PyObject_Free(oldkeys);
1556 }
1557 }
1558 }
1559
1560 mp->ma_keys->dk_usable -= numentries;
1561 mp->ma_keys->dk_nentries = numentries;
1562 ASSERT_CONSISTENT(mp);
1563 return 0;
1564 }
1565
1566 static PyObject *
dict_new_presized(Py_ssize_t minused,bool unicode)1567 dict_new_presized(Py_ssize_t minused, bool unicode)
1568 {
1569 const uint8_t log2_max_presize = 17;
1570 const Py_ssize_t max_presize = ((Py_ssize_t)1) << log2_max_presize;
1571 uint8_t log2_newsize;
1572 PyDictKeysObject *new_keys;
1573
1574 if (minused <= USABLE_FRACTION(PyDict_MINSIZE)) {
1575 return PyDict_New();
1576 }
1577 /* There are no strict guarantee that returned dict can contain minused
1578 * items without resize. So we create medium size dict instead of very
1579 * large dict or MemoryError.
1580 */
1581 if (minused > USABLE_FRACTION(max_presize)) {
1582 log2_newsize = log2_max_presize;
1583 }
1584 else {
1585 log2_newsize = estimate_log2_keysize(minused);
1586 }
1587
1588 new_keys = new_keys_object(log2_newsize, unicode);
1589 if (new_keys == NULL)
1590 return NULL;
1591 return new_dict(new_keys, NULL, 0, 0);
1592 }
1593
1594 PyObject *
_PyDict_NewPresized(Py_ssize_t minused)1595 _PyDict_NewPresized(Py_ssize_t minused)
1596 {
1597 return dict_new_presized(minused, false);
1598 }
1599
1600 PyObject *
_PyDict_FromItems(PyObject * const * keys,Py_ssize_t keys_offset,PyObject * const * values,Py_ssize_t values_offset,Py_ssize_t length)1601 _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset,
1602 PyObject *const *values, Py_ssize_t values_offset,
1603 Py_ssize_t length)
1604 {
1605 bool unicode = true;
1606 PyObject *const *ks = keys;
1607
1608 for (Py_ssize_t i = 0; i < length; i++) {
1609 if (!PyUnicode_CheckExact(*ks)) {
1610 unicode = false;
1611 break;
1612 }
1613 ks += keys_offset;
1614 }
1615
1616 PyObject *dict = dict_new_presized(length, unicode);
1617 if (dict == NULL) {
1618 return NULL;
1619 }
1620
1621 ks = keys;
1622 PyObject *const *vs = values;
1623
1624 for (Py_ssize_t i = 0; i < length; i++) {
1625 PyObject *key = *ks;
1626 PyObject *value = *vs;
1627 if (PyDict_SetItem(dict, key, value) < 0) {
1628 Py_DECREF(dict);
1629 return NULL;
1630 }
1631 ks += keys_offset;
1632 vs += values_offset;
1633 }
1634
1635 return dict;
1636 }
1637
1638 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
1639 * that may occur (originally dicts supported only string keys, and exceptions
1640 * weren't possible). So, while the original intent was that a NULL return
1641 * meant the key wasn't present, in reality it can mean that, or that an error
1642 * (suppressed) occurred while computing the key's hash, or that some error
1643 * (suppressed) occurred when comparing keys in the dict's internal probe
1644 * sequence. A nasty example of the latter is when a Python-coded comparison
1645 * function hits a stack-depth error, which can cause this to return NULL
1646 * even if the key is present.
1647 */
1648 PyObject *
PyDict_GetItem(PyObject * op,PyObject * key)1649 PyDict_GetItem(PyObject *op, PyObject *key)
1650 {
1651 if (!PyDict_Check(op)) {
1652 return NULL;
1653 }
1654 PyDictObject *mp = (PyDictObject *)op;
1655
1656 Py_hash_t hash;
1657 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
1658 hash = PyObject_Hash(key);
1659 if (hash == -1) {
1660 PyErr_Clear();
1661 return NULL;
1662 }
1663 }
1664
1665 PyThreadState *tstate = _PyThreadState_GET();
1666 #ifdef Py_DEBUG
1667 // bpo-40839: Before Python 3.10, it was possible to call PyDict_GetItem()
1668 // with the GIL released.
1669 _Py_EnsureTstateNotNULL(tstate);
1670 #endif
1671
1672 /* Preserve the existing exception */
1673 PyObject *exc_type, *exc_value, *exc_tb;
1674 PyObject *value;
1675 Py_ssize_t ix; (void)ix;
1676
1677 _PyErr_Fetch(tstate, &exc_type, &exc_value, &exc_tb);
1678 ix = _Py_dict_lookup(mp, key, hash, &value);
1679
1680 /* Ignore any exception raised by the lookup */
1681 _PyErr_Restore(tstate, exc_type, exc_value, exc_tb);
1682
1683
1684 assert(ix >= 0 || value == NULL);
1685 return value;
1686 }
1687
1688 Py_ssize_t
_PyDict_GetItemHint(PyDictObject * mp,PyObject * key,Py_ssize_t hint,PyObject ** value)1689 _PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
1690 Py_ssize_t hint, PyObject **value)
1691 {
1692 assert(*value == NULL);
1693 assert(PyDict_CheckExact((PyObject*)mp));
1694 assert(PyUnicode_CheckExact(key));
1695
1696 if (hint >= 0 && hint < mp->ma_keys->dk_nentries) {
1697 PyObject *res = NULL;
1698
1699 if (DK_IS_UNICODE(mp->ma_keys)) {
1700 PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mp->ma_keys) + (size_t)hint;
1701 if (ep->me_key == key) {
1702 if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
1703 assert(mp->ma_values != NULL);
1704 res = mp->ma_values->values[(size_t)hint];
1705 }
1706 else {
1707 res = ep->me_value;
1708 }
1709 if (res != NULL) {
1710 *value = res;
1711 return hint;
1712 }
1713 }
1714 }
1715 else {
1716 PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
1717 if (ep->me_key == key) {
1718 if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
1719 assert(mp->ma_values != NULL);
1720 res = mp->ma_values->values[(size_t)hint];
1721 }
1722 else {
1723 res = ep->me_value;
1724 }
1725 if (res != NULL) {
1726 *value = res;
1727 return hint;
1728 }
1729 }
1730 }
1731 }
1732
1733 Py_hash_t hash = unicode_get_hash(key);
1734 if (hash == -1) {
1735 hash = PyObject_Hash(key);
1736 if (hash == -1) {
1737 return -1;
1738 }
1739 }
1740
1741 return _Py_dict_lookup(mp, key, hash, value);
1742 }
1743
1744 /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
1745 This returns NULL *with* an exception set if an exception occurred.
1746 It returns NULL *without* an exception set if the key wasn't present.
1747 */
1748 PyObject *
_PyDict_GetItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1749 _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1750 {
1751 Py_ssize_t ix; (void)ix;
1752 PyDictObject *mp = (PyDictObject *)op;
1753 PyObject *value;
1754
1755 if (!PyDict_Check(op)) {
1756 PyErr_BadInternalCall();
1757 return NULL;
1758 }
1759
1760 ix = _Py_dict_lookup(mp, key, hash, &value);
1761 assert(ix >= 0 || value == NULL);
1762 return value;
1763 }
1764
1765 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
1766 This returns NULL *with* an exception set if an exception occurred.
1767 It returns NULL *without* an exception set if the key wasn't present.
1768 */
1769 PyObject *
PyDict_GetItemWithError(PyObject * op,PyObject * key)1770 PyDict_GetItemWithError(PyObject *op, PyObject *key)
1771 {
1772 Py_ssize_t ix; (void)ix;
1773 Py_hash_t hash;
1774 PyDictObject*mp = (PyDictObject *)op;
1775 PyObject *value;
1776
1777 if (!PyDict_Check(op)) {
1778 PyErr_BadInternalCall();
1779 return NULL;
1780 }
1781 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1)
1782 {
1783 hash = PyObject_Hash(key);
1784 if (hash == -1) {
1785 return NULL;
1786 }
1787 }
1788
1789 ix = _Py_dict_lookup(mp, key, hash, &value);
1790 assert(ix >= 0 || value == NULL);
1791 return value;
1792 }
1793
1794 PyObject *
_PyDict_GetItemWithError(PyObject * dp,PyObject * kv)1795 _PyDict_GetItemWithError(PyObject *dp, PyObject *kv)
1796 {
1797 assert(PyUnicode_CheckExact(kv));
1798 Py_hash_t hash = kv->ob_type->tp_hash(kv);
1799 if (hash == -1) {
1800 return NULL;
1801 }
1802 return _PyDict_GetItem_KnownHash(dp, kv, hash);
1803 }
1804
1805 PyObject *
_PyDict_GetItemIdWithError(PyObject * dp,_Py_Identifier * key)1806 _PyDict_GetItemIdWithError(PyObject *dp, _Py_Identifier *key)
1807 {
1808 PyObject *kv;
1809 kv = _PyUnicode_FromId(key); /* borrowed */
1810 if (kv == NULL)
1811 return NULL;
1812 Py_hash_t hash = unicode_get_hash(kv);
1813 assert (hash != -1); /* interned strings have their hash value initialised */
1814 return _PyDict_GetItem_KnownHash(dp, kv, hash);
1815 }
1816
1817 PyObject *
_PyDict_GetItemStringWithError(PyObject * v,const char * key)1818 _PyDict_GetItemStringWithError(PyObject *v, const char *key)
1819 {
1820 PyObject *kv, *rv;
1821 kv = PyUnicode_FromString(key);
1822 if (kv == NULL) {
1823 return NULL;
1824 }
1825 rv = PyDict_GetItemWithError(v, kv);
1826 Py_DECREF(kv);
1827 return rv;
1828 }
1829
1830 /* Fast version of global value lookup (LOAD_GLOBAL).
1831 * Lookup in globals, then builtins.
1832 *
1833 *
1834 *
1835 *
1836 * Raise an exception and return NULL if an error occurred (ex: computing the
1837 * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
1838 * exist. Return the value if the key exists.
1839 */
1840 PyObject *
_PyDict_LoadGlobal(PyDictObject * globals,PyDictObject * builtins,PyObject * key)1841 _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
1842 {
1843 Py_ssize_t ix;
1844 Py_hash_t hash;
1845 PyObject *value;
1846
1847 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
1848 hash = PyObject_Hash(key);
1849 if (hash == -1)
1850 return NULL;
1851 }
1852
1853 /* namespace 1: globals */
1854 ix = _Py_dict_lookup(globals, key, hash, &value);
1855 if (ix == DKIX_ERROR)
1856 return NULL;
1857 if (ix != DKIX_EMPTY && value != NULL)
1858 return value;
1859
1860 /* namespace 2: builtins */
1861 ix = _Py_dict_lookup(builtins, key, hash, &value);
1862 assert(ix >= 0 || value == NULL);
1863 return value;
1864 }
1865
1866 /* Consumes references to key and value */
1867 int
_PyDict_SetItem_Take2(PyDictObject * mp,PyObject * key,PyObject * value)1868 _PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value)
1869 {
1870 assert(key);
1871 assert(value);
1872 assert(PyDict_Check(mp));
1873 Py_hash_t hash;
1874 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
1875 hash = PyObject_Hash(key);
1876 if (hash == -1) {
1877 Py_DECREF(key);
1878 Py_DECREF(value);
1879 return -1;
1880 }
1881 }
1882 if (mp->ma_keys == Py_EMPTY_KEYS) {
1883 return insert_to_emptydict(mp, key, hash, value);
1884 }
1885 /* insertdict() handles any resizing that might be necessary */
1886 return insertdict(mp, key, hash, value);
1887 }
1888
1889 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
1890 * dictionary if it's merely replacing the value for an existing key.
1891 * This means that it's safe to loop over a dictionary with PyDict_Next()
1892 * and occasionally replace a value -- but you can't insert new keys or
1893 * remove them.
1894 */
1895 int
PyDict_SetItem(PyObject * op,PyObject * key,PyObject * value)1896 PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
1897 {
1898 if (!PyDict_Check(op)) {
1899 PyErr_BadInternalCall();
1900 return -1;
1901 }
1902 assert(key);
1903 assert(value);
1904 Py_INCREF(key);
1905 Py_INCREF(value);
1906 return _PyDict_SetItem_Take2((PyDictObject *)op, key, value);
1907 }
1908
1909 int
_PyDict_SetItem_KnownHash(PyObject * op,PyObject * key,PyObject * value,Py_hash_t hash)1910 _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
1911 Py_hash_t hash)
1912 {
1913 PyDictObject *mp;
1914
1915 if (!PyDict_Check(op)) {
1916 PyErr_BadInternalCall();
1917 return -1;
1918 }
1919 assert(key);
1920 assert(value);
1921 assert(hash != -1);
1922 mp = (PyDictObject *)op;
1923
1924 Py_INCREF(key);
1925 Py_INCREF(value);
1926 if (mp->ma_keys == Py_EMPTY_KEYS) {
1927 return insert_to_emptydict(mp, key, hash, value);
1928 }
1929 /* insertdict() handles any resizing that might be necessary */
1930 return insertdict(mp, key, hash, value);
1931 }
1932
1933 static void
delete_index_from_values(PyDictValues * values,Py_ssize_t ix)1934 delete_index_from_values(PyDictValues *values, Py_ssize_t ix)
1935 {
1936 uint8_t *size_ptr = ((uint8_t *)values)-2;
1937 int size = *size_ptr;
1938 int i;
1939 for (i = 1; size_ptr[-i] != ix; i++) {
1940 assert(i <= size);
1941 }
1942 assert(i <= size);
1943 for (; i < size; i++) {
1944 size_ptr[-i] = size_ptr[-i-1];
1945 }
1946 *size_ptr = size -1;
1947 }
1948
1949 static int
delitem_common(PyDictObject * mp,Py_hash_t hash,Py_ssize_t ix,PyObject * old_value)1950 delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
1951 PyObject *old_value)
1952 {
1953 PyObject *old_key;
1954
1955 Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix);
1956 assert(hashpos >= 0);
1957
1958 mp->ma_used--;
1959 mp->ma_version_tag = DICT_NEXT_VERSION();
1960 if (mp->ma_values) {
1961 assert(old_value == mp->ma_values->values[ix]);
1962 mp->ma_values->values[ix] = NULL;
1963 assert(ix < SHARED_KEYS_MAX_SIZE);
1964 /* Update order */
1965 delete_index_from_values(mp->ma_values, ix);
1966 ASSERT_CONSISTENT(mp);
1967 }
1968 else {
1969 mp->ma_keys->dk_version = 0;
1970 dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1971 if (DK_IS_UNICODE(mp->ma_keys)) {
1972 PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix];
1973 old_key = ep->me_key;
1974 ep->me_key = NULL;
1975 ep->me_value = NULL;
1976 }
1977 else {
1978 PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix];
1979 old_key = ep->me_key;
1980 ep->me_key = NULL;
1981 ep->me_value = NULL;
1982 ep->me_hash = 0;
1983 }
1984 Py_DECREF(old_key);
1985 }
1986 Py_DECREF(old_value);
1987
1988 ASSERT_CONSISTENT(mp);
1989 return 0;
1990 }
1991
1992 int
PyDict_DelItem(PyObject * op,PyObject * key)1993 PyDict_DelItem(PyObject *op, PyObject *key)
1994 {
1995 Py_hash_t hash;
1996 assert(key);
1997 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
1998 hash = PyObject_Hash(key);
1999 if (hash == -1)
2000 return -1;
2001 }
2002
2003 return _PyDict_DelItem_KnownHash(op, key, hash);
2004 }
2005
2006 int
_PyDict_DelItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)2007 _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
2008 {
2009 Py_ssize_t ix;
2010 PyDictObject *mp;
2011 PyObject *old_value;
2012
2013 if (!PyDict_Check(op)) {
2014 PyErr_BadInternalCall();
2015 return -1;
2016 }
2017 assert(key);
2018 assert(hash != -1);
2019 mp = (PyDictObject *)op;
2020 ix = _Py_dict_lookup(mp, key, hash, &old_value);
2021 if (ix == DKIX_ERROR)
2022 return -1;
2023 if (ix == DKIX_EMPTY || old_value == NULL) {
2024 _PyErr_SetKeyError(key);
2025 return -1;
2026 }
2027
2028 return delitem_common(mp, hash, ix, old_value);
2029 }
2030
2031 /* This function promises that the predicate -> deletion sequence is atomic
2032 * (i.e. protected by the GIL), assuming the predicate itself doesn't
2033 * release the GIL.
2034 */
2035 int
_PyDict_DelItemIf(PyObject * op,PyObject * key,int (* predicate)(PyObject * value))2036 _PyDict_DelItemIf(PyObject *op, PyObject *key,
2037 int (*predicate)(PyObject *value))
2038 {
2039 Py_ssize_t hashpos, ix;
2040 PyDictObject *mp;
2041 Py_hash_t hash;
2042 PyObject *old_value;
2043 int res;
2044
2045 if (!PyDict_Check(op)) {
2046 PyErr_BadInternalCall();
2047 return -1;
2048 }
2049 assert(key);
2050 hash = PyObject_Hash(key);
2051 if (hash == -1)
2052 return -1;
2053 mp = (PyDictObject *)op;
2054 ix = _Py_dict_lookup(mp, key, hash, &old_value);
2055 if (ix == DKIX_ERROR)
2056 return -1;
2057 if (ix == DKIX_EMPTY || old_value == NULL) {
2058 _PyErr_SetKeyError(key);
2059 return -1;
2060 }
2061
2062 res = predicate(old_value);
2063 if (res == -1)
2064 return -1;
2065
2066 hashpos = lookdict_index(mp->ma_keys, hash, ix);
2067 assert(hashpos >= 0);
2068
2069 if (res > 0)
2070 return delitem_common(mp, hashpos, ix, old_value);
2071 else
2072 return 0;
2073 }
2074
2075
2076 void
PyDict_Clear(PyObject * op)2077 PyDict_Clear(PyObject *op)
2078 {
2079 PyDictObject *mp;
2080 PyDictKeysObject *oldkeys;
2081 PyDictValues *oldvalues;
2082 Py_ssize_t i, n;
2083
2084 if (!PyDict_Check(op))
2085 return;
2086 mp = ((PyDictObject *)op);
2087 oldkeys = mp->ma_keys;
2088 oldvalues = mp->ma_values;
2089 if (oldkeys == Py_EMPTY_KEYS) {
2090 return;
2091 }
2092 /* Empty the dict... */
2093 dictkeys_incref(Py_EMPTY_KEYS);
2094 mp->ma_keys = Py_EMPTY_KEYS;
2095 mp->ma_values = NULL;
2096 mp->ma_used = 0;
2097 mp->ma_version_tag = DICT_NEXT_VERSION();
2098 /* ...then clear the keys and values */
2099 if (oldvalues != NULL) {
2100 n = oldkeys->dk_nentries;
2101 for (i = 0; i < n; i++)
2102 Py_CLEAR(oldvalues->values[i]);
2103 free_values(oldvalues);
2104 dictkeys_decref(oldkeys);
2105 }
2106 else {
2107 assert(oldkeys->dk_refcnt == 1);
2108 dictkeys_decref(oldkeys);
2109 }
2110 ASSERT_CONSISTENT(mp);
2111 }
2112
2113 /* Internal version of PyDict_Next that returns a hash value in addition
2114 * to the key and value.
2115 * Return 1 on success, return 0 when the reached the end of the dictionary
2116 * (or if op is not a dictionary)
2117 */
2118 int
_PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue,Py_hash_t * phash)2119 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
2120 PyObject **pvalue, Py_hash_t *phash)
2121 {
2122 Py_ssize_t i;
2123 PyDictObject *mp;
2124 PyObject *key, *value;
2125 Py_hash_t hash;
2126
2127 if (!PyDict_Check(op))
2128 return 0;
2129 mp = (PyDictObject *)op;
2130 i = *ppos;
2131 if (mp->ma_values) {
2132 assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE);
2133 if (i < 0 || i >= mp->ma_used)
2134 return 0;
2135 int index = get_index_from_order(mp, i);
2136 value = mp->ma_values->values[index];
2137
2138 key = DK_UNICODE_ENTRIES(mp->ma_keys)[index].me_key;
2139 hash = unicode_get_hash(key);
2140 assert(value != NULL);
2141 }
2142 else {
2143 Py_ssize_t n = mp->ma_keys->dk_nentries;
2144 if (i < 0 || i >= n)
2145 return 0;
2146 if (DK_IS_UNICODE(mp->ma_keys)) {
2147 PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(mp->ma_keys)[i];
2148 while (i < n && entry_ptr->me_value == NULL) {
2149 entry_ptr++;
2150 i++;
2151 }
2152 if (i >= n)
2153 return 0;
2154 key = entry_ptr->me_key;
2155 hash = unicode_get_hash(entry_ptr->me_key);
2156 value = entry_ptr->me_value;
2157 }
2158 else {
2159 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
2160 while (i < n && entry_ptr->me_value == NULL) {
2161 entry_ptr++;
2162 i++;
2163 }
2164 if (i >= n)
2165 return 0;
2166 key = entry_ptr->me_key;
2167 hash = entry_ptr->me_hash;
2168 value = entry_ptr->me_value;
2169 }
2170 }
2171 *ppos = i+1;
2172 if (pkey)
2173 *pkey = key;
2174 if (pvalue)
2175 *pvalue = value;
2176 if (phash)
2177 *phash = hash;
2178 return 1;
2179 }
2180
2181 /*
2182 * Iterate over a dict. Use like so:
2183 *
2184 * Py_ssize_t i;
2185 * PyObject *key, *value;
2186 * i = 0; # important! i should not otherwise be changed by you
2187 * while (PyDict_Next(yourdict, &i, &key, &value)) {
2188 * Refer to borrowed references in key and value.
2189 * }
2190 *
2191 * Return 1 on success, return 0 when the reached the end of the dictionary
2192 * (or if op is not a dictionary)
2193 *
2194 * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
2195 * mutates the dict. One exception: it is safe if the loop merely changes
2196 * the values associated with the keys (but doesn't insert new keys or
2197 * delete keys), via PyDict_SetItem().
2198 */
2199 int
PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue)2200 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
2201 {
2202 return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
2203 }
2204
2205 /* Internal version of dict.pop(). */
2206 PyObject *
_PyDict_Pop_KnownHash(PyObject * dict,PyObject * key,Py_hash_t hash,PyObject * deflt)2207 _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
2208 {
2209 Py_ssize_t ix;
2210 PyObject *old_value;
2211 PyDictObject *mp;
2212
2213 assert(PyDict_Check(dict));
2214 mp = (PyDictObject *)dict;
2215
2216 if (mp->ma_used == 0) {
2217 if (deflt) {
2218 Py_INCREF(deflt);
2219 return deflt;
2220 }
2221 _PyErr_SetKeyError(key);
2222 return NULL;
2223 }
2224 ix = _Py_dict_lookup(mp, key, hash, &old_value);
2225 if (ix == DKIX_ERROR)
2226 return NULL;
2227 if (ix == DKIX_EMPTY || old_value == NULL) {
2228 if (deflt) {
2229 Py_INCREF(deflt);
2230 return deflt;
2231 }
2232 _PyErr_SetKeyError(key);
2233 return NULL;
2234 }
2235 assert(old_value != NULL);
2236 Py_INCREF(old_value);
2237 delitem_common(mp, hash, ix, old_value);
2238
2239 ASSERT_CONSISTENT(mp);
2240 return old_value;
2241 }
2242
2243 PyObject *
_PyDict_Pop(PyObject * dict,PyObject * key,PyObject * deflt)2244 _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
2245 {
2246 Py_hash_t hash;
2247
2248 if (((PyDictObject *)dict)->ma_used == 0) {
2249 if (deflt) {
2250 Py_INCREF(deflt);
2251 return deflt;
2252 }
2253 _PyErr_SetKeyError(key);
2254 return NULL;
2255 }
2256 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2257 hash = PyObject_Hash(key);
2258 if (hash == -1)
2259 return NULL;
2260 }
2261 return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
2262 }
2263
2264 /* Internal version of dict.from_keys(). It is subclass-friendly. */
2265 PyObject *
_PyDict_FromKeys(PyObject * cls,PyObject * iterable,PyObject * value)2266 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
2267 {
2268 PyObject *it; /* iter(iterable) */
2269 PyObject *key;
2270 PyObject *d;
2271 int status;
2272
2273 d = _PyObject_CallNoArgs(cls);
2274 if (d == NULL)
2275 return NULL;
2276
2277 if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
2278 if (PyDict_CheckExact(iterable)) {
2279 PyDictObject *mp = (PyDictObject *)d;
2280 PyObject *oldvalue;
2281 Py_ssize_t pos = 0;
2282 PyObject *key;
2283 Py_hash_t hash;
2284
2285 int unicode = DK_IS_UNICODE(((PyDictObject*)iterable)->ma_keys);
2286 if (dictresize(mp, estimate_log2_keysize(PyDict_GET_SIZE(iterable)), unicode)) {
2287 Py_DECREF(d);
2288 return NULL;
2289 }
2290
2291 while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) {
2292 Py_INCREF(key);
2293 Py_INCREF(value);
2294 if (insertdict(mp, key, hash, value)) {
2295 Py_DECREF(d);
2296 return NULL;
2297 }
2298 }
2299 return d;
2300 }
2301 if (PyAnySet_CheckExact(iterable)) {
2302 PyDictObject *mp = (PyDictObject *)d;
2303 Py_ssize_t pos = 0;
2304 PyObject *key;
2305 Py_hash_t hash;
2306
2307 if (dictresize(mp, estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) {
2308 Py_DECREF(d);
2309 return NULL;
2310 }
2311
2312 while (_PySet_NextEntry(iterable, &pos, &key, &hash)) {
2313 Py_INCREF(key);
2314 Py_INCREF(value);
2315 if (insertdict(mp, key, hash, value)) {
2316 Py_DECREF(d);
2317 return NULL;
2318 }
2319 }
2320 return d;
2321 }
2322 }
2323
2324 it = PyObject_GetIter(iterable);
2325 if (it == NULL){
2326 Py_DECREF(d);
2327 return NULL;
2328 }
2329
2330 if (PyDict_CheckExact(d)) {
2331 while ((key = PyIter_Next(it)) != NULL) {
2332 status = PyDict_SetItem(d, key, value);
2333 Py_DECREF(key);
2334 if (status < 0)
2335 goto Fail;
2336 }
2337 } else {
2338 while ((key = PyIter_Next(it)) != NULL) {
2339 status = PyObject_SetItem(d, key, value);
2340 Py_DECREF(key);
2341 if (status < 0)
2342 goto Fail;
2343 }
2344 }
2345
2346 if (PyErr_Occurred())
2347 goto Fail;
2348 Py_DECREF(it);
2349 return d;
2350
2351 Fail:
2352 Py_DECREF(it);
2353 Py_DECREF(d);
2354 return NULL;
2355 }
2356
2357 /* Methods */
2358
2359 static void
dict_dealloc(PyDictObject * mp)2360 dict_dealloc(PyDictObject *mp)
2361 {
2362 PyDictValues *values = mp->ma_values;
2363 PyDictKeysObject *keys = mp->ma_keys;
2364 Py_ssize_t i, n;
2365
2366 /* bpo-31095: UnTrack is needed before calling any callbacks */
2367 PyObject_GC_UnTrack(mp);
2368 Py_TRASHCAN_BEGIN(mp, dict_dealloc)
2369 if (values != NULL) {
2370 for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
2371 Py_XDECREF(values->values[i]);
2372 }
2373 free_values(values);
2374 dictkeys_decref(keys);
2375 }
2376 else if (keys != NULL) {
2377 assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS);
2378 dictkeys_decref(keys);
2379 }
2380 #if PyDict_MAXFREELIST > 0
2381 struct _Py_dict_state *state = get_dict_state();
2382 #ifdef Py_DEBUG
2383 // new_dict() must not be called after _PyDict_Fini()
2384 assert(state->numfree != -1);
2385 #endif
2386 if (state->numfree < PyDict_MAXFREELIST && Py_IS_TYPE(mp, &PyDict_Type)) {
2387 state->free_list[state->numfree++] = mp;
2388 OBJECT_STAT_INC(to_freelist);
2389 }
2390 else
2391 #endif
2392 {
2393 Py_TYPE(mp)->tp_free((PyObject *)mp);
2394 }
2395 Py_TRASHCAN_END
2396 }
2397
2398
2399 static PyObject *
dict_repr(PyDictObject * mp)2400 dict_repr(PyDictObject *mp)
2401 {
2402 Py_ssize_t i;
2403 PyObject *key = NULL, *value = NULL;
2404 _PyUnicodeWriter writer;
2405 int first;
2406
2407 i = Py_ReprEnter((PyObject *)mp);
2408 if (i != 0) {
2409 return i > 0 ? PyUnicode_FromString("{...}") : NULL;
2410 }
2411
2412 if (mp->ma_used == 0) {
2413 Py_ReprLeave((PyObject *)mp);
2414 return PyUnicode_FromString("{}");
2415 }
2416
2417 _PyUnicodeWriter_Init(&writer);
2418 writer.overallocate = 1;
2419 /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */
2420 writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1;
2421
2422 if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0)
2423 goto error;
2424
2425 /* Do repr() on each key+value pair, and insert ": " between them.
2426 Note that repr may mutate the dict. */
2427 i = 0;
2428 first = 1;
2429 while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
2430 PyObject *s;
2431 int res;
2432
2433 /* Prevent repr from deleting key or value during key format. */
2434 Py_INCREF(key);
2435 Py_INCREF(value);
2436
2437 if (!first) {
2438 if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
2439 goto error;
2440 }
2441 first = 0;
2442
2443 s = PyObject_Repr(key);
2444 if (s == NULL)
2445 goto error;
2446 res = _PyUnicodeWriter_WriteStr(&writer, s);
2447 Py_DECREF(s);
2448 if (res < 0)
2449 goto error;
2450
2451 if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0)
2452 goto error;
2453
2454 s = PyObject_Repr(value);
2455 if (s == NULL)
2456 goto error;
2457 res = _PyUnicodeWriter_WriteStr(&writer, s);
2458 Py_DECREF(s);
2459 if (res < 0)
2460 goto error;
2461
2462 Py_CLEAR(key);
2463 Py_CLEAR(value);
2464 }
2465
2466 writer.overallocate = 0;
2467 if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0)
2468 goto error;
2469
2470 Py_ReprLeave((PyObject *)mp);
2471
2472 return _PyUnicodeWriter_Finish(&writer);
2473
2474 error:
2475 Py_ReprLeave((PyObject *)mp);
2476 _PyUnicodeWriter_Dealloc(&writer);
2477 Py_XDECREF(key);
2478 Py_XDECREF(value);
2479 return NULL;
2480 }
2481
2482 static Py_ssize_t
dict_length(PyDictObject * mp)2483 dict_length(PyDictObject *mp)
2484 {
2485 return mp->ma_used;
2486 }
2487
2488 static PyObject *
dict_subscript(PyDictObject * mp,PyObject * key)2489 dict_subscript(PyDictObject *mp, PyObject *key)
2490 {
2491 Py_ssize_t ix;
2492 Py_hash_t hash;
2493 PyObject *value;
2494
2495 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2496 hash = PyObject_Hash(key);
2497 if (hash == -1)
2498 return NULL;
2499 }
2500 ix = _Py_dict_lookup(mp, key, hash, &value);
2501 if (ix == DKIX_ERROR)
2502 return NULL;
2503 if (ix == DKIX_EMPTY || value == NULL) {
2504 if (!PyDict_CheckExact(mp)) {
2505 /* Look up __missing__ method if we're a subclass. */
2506 PyObject *missing, *res;
2507 missing = _PyObject_LookupSpecial(
2508 (PyObject *)mp, &_Py_ID(__missing__));
2509 if (missing != NULL) {
2510 res = PyObject_CallOneArg(missing, key);
2511 Py_DECREF(missing);
2512 return res;
2513 }
2514 else if (PyErr_Occurred())
2515 return NULL;
2516 }
2517 _PyErr_SetKeyError(key);
2518 return NULL;
2519 }
2520 Py_INCREF(value);
2521 return value;
2522 }
2523
2524 static int
dict_ass_sub(PyDictObject * mp,PyObject * v,PyObject * w)2525 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
2526 {
2527 if (w == NULL)
2528 return PyDict_DelItem((PyObject *)mp, v);
2529 else
2530 return PyDict_SetItem((PyObject *)mp, v, w);
2531 }
2532
2533 static PyMappingMethods dict_as_mapping = {
2534 (lenfunc)dict_length, /*mp_length*/
2535 (binaryfunc)dict_subscript, /*mp_subscript*/
2536 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
2537 };
2538
2539 static PyObject *
dict_keys(PyDictObject * mp)2540 dict_keys(PyDictObject *mp)
2541 {
2542 PyObject *v;
2543 Py_ssize_t n;
2544
2545 again:
2546 n = mp->ma_used;
2547 v = PyList_New(n);
2548 if (v == NULL)
2549 return NULL;
2550 if (n != mp->ma_used) {
2551 /* Durnit. The allocations caused the dict to resize.
2552 * Just start over, this shouldn't normally happen.
2553 */
2554 Py_DECREF(v);
2555 goto again;
2556 }
2557
2558 /* Nothing we do below makes any function calls. */
2559 Py_ssize_t j = 0, pos = 0;
2560 PyObject *key;
2561 while (_PyDict_Next((PyObject*)mp, &pos, &key, NULL, NULL)) {
2562 assert(j < n);
2563 Py_INCREF(key);
2564 PyList_SET_ITEM(v, j, key);
2565 j++;
2566 }
2567 assert(j == n);
2568 return v;
2569 }
2570
2571 static PyObject *
dict_values(PyDictObject * mp)2572 dict_values(PyDictObject *mp)
2573 {
2574 PyObject *v;
2575 Py_ssize_t n;
2576
2577 again:
2578 n = mp->ma_used;
2579 v = PyList_New(n);
2580 if (v == NULL)
2581 return NULL;
2582 if (n != mp->ma_used) {
2583 /* Durnit. The allocations caused the dict to resize.
2584 * Just start over, this shouldn't normally happen.
2585 */
2586 Py_DECREF(v);
2587 goto again;
2588 }
2589
2590 /* Nothing we do below makes any function calls. */
2591 Py_ssize_t j = 0, pos = 0;
2592 PyObject *value;
2593 while (_PyDict_Next((PyObject*)mp, &pos, NULL, &value, NULL)) {
2594 assert(j < n);
2595 Py_INCREF(value);
2596 PyList_SET_ITEM(v, j, value);
2597 j++;
2598 }
2599 assert(j == n);
2600 return v;
2601 }
2602
2603 static PyObject *
dict_items(PyDictObject * mp)2604 dict_items(PyDictObject *mp)
2605 {
2606 PyObject *v;
2607 Py_ssize_t i, n;
2608 PyObject *item;
2609
2610 /* Preallocate the list of tuples, to avoid allocations during
2611 * the loop over the items, which could trigger GC, which
2612 * could resize the dict. :-(
2613 */
2614 again:
2615 n = mp->ma_used;
2616 v = PyList_New(n);
2617 if (v == NULL)
2618 return NULL;
2619 for (i = 0; i < n; i++) {
2620 item = PyTuple_New(2);
2621 if (item == NULL) {
2622 Py_DECREF(v);
2623 return NULL;
2624 }
2625 PyList_SET_ITEM(v, i, item);
2626 }
2627 if (n != mp->ma_used) {
2628 /* Durnit. The allocations caused the dict to resize.
2629 * Just start over, this shouldn't normally happen.
2630 */
2631 Py_DECREF(v);
2632 goto again;
2633 }
2634
2635 /* Nothing we do below makes any function calls. */
2636 Py_ssize_t j = 0, pos = 0;
2637 PyObject *key, *value;
2638 while (_PyDict_Next((PyObject*)mp, &pos, &key, &value, NULL)) {
2639 assert(j < n);
2640 PyObject *item = PyList_GET_ITEM(v, j);
2641 Py_INCREF(key);
2642 PyTuple_SET_ITEM(item, 0, key);
2643 Py_INCREF(value);
2644 PyTuple_SET_ITEM(item, 1, value);
2645 j++;
2646 }
2647 assert(j == n);
2648 return v;
2649 }
2650
2651 /*[clinic input]
2652 @classmethod
2653 dict.fromkeys
2654 iterable: object
2655 value: object=None
2656 /
2657
2658 Create a new dictionary with keys from iterable and values set to value.
2659 [clinic start generated code]*/
2660
2661 static PyObject *
dict_fromkeys_impl(PyTypeObject * type,PyObject * iterable,PyObject * value)2662 dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
2663 /*[clinic end generated code: output=8fb98e4b10384999 input=382ba4855d0f74c3]*/
2664 {
2665 return _PyDict_FromKeys((PyObject *)type, iterable, value);
2666 }
2667
2668 /* Single-arg dict update; used by dict_update_common and operators. */
2669 static int
dict_update_arg(PyObject * self,PyObject * arg)2670 dict_update_arg(PyObject *self, PyObject *arg)
2671 {
2672 if (PyDict_CheckExact(arg)) {
2673 return PyDict_Merge(self, arg, 1);
2674 }
2675 PyObject *func;
2676 if (_PyObject_LookupAttr(arg, &_Py_ID(keys), &func) < 0) {
2677 return -1;
2678 }
2679 if (func != NULL) {
2680 Py_DECREF(func);
2681 return PyDict_Merge(self, arg, 1);
2682 }
2683 return PyDict_MergeFromSeq2(self, arg, 1);
2684 }
2685
2686 static int
dict_update_common(PyObject * self,PyObject * args,PyObject * kwds,const char * methname)2687 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,
2688 const char *methname)
2689 {
2690 PyObject *arg = NULL;
2691 int result = 0;
2692
2693 if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg)) {
2694 result = -1;
2695 }
2696 else if (arg != NULL) {
2697 result = dict_update_arg(self, arg);
2698 }
2699
2700 if (result == 0 && kwds != NULL) {
2701 if (PyArg_ValidateKeywordArguments(kwds))
2702 result = PyDict_Merge(self, kwds, 1);
2703 else
2704 result = -1;
2705 }
2706 return result;
2707 }
2708
2709 /* Note: dict.update() uses the METH_VARARGS|METH_KEYWORDS calling convention.
2710 Using METH_FASTCALL|METH_KEYWORDS would make dict.update(**dict2) calls
2711 slower, see the issue #29312. */
2712 static PyObject *
dict_update(PyObject * self,PyObject * args,PyObject * kwds)2713 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
2714 {
2715 if (dict_update_common(self, args, kwds, "update") != -1)
2716 Py_RETURN_NONE;
2717 return NULL;
2718 }
2719
2720 /* Update unconditionally replaces existing items.
2721 Merge has a 3rd argument 'override'; if set, it acts like Update,
2722 otherwise it leaves existing items unchanged.
2723
2724 PyDict_{Update,Merge} update/merge from a mapping object.
2725
2726 PyDict_MergeFromSeq2 updates/merges from any iterable object
2727 producing iterable objects of length 2.
2728 */
2729
2730 int
PyDict_MergeFromSeq2(PyObject * d,PyObject * seq2,int override)2731 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
2732 {
2733 PyObject *it; /* iter(seq2) */
2734 Py_ssize_t i; /* index into seq2 of current element */
2735 PyObject *item; /* seq2[i] */
2736 PyObject *fast; /* item as a 2-tuple or 2-list */
2737
2738 assert(d != NULL);
2739 assert(PyDict_Check(d));
2740 assert(seq2 != NULL);
2741
2742 it = PyObject_GetIter(seq2);
2743 if (it == NULL)
2744 return -1;
2745
2746 for (i = 0; ; ++i) {
2747 PyObject *key, *value;
2748 Py_ssize_t n;
2749
2750 fast = NULL;
2751 item = PyIter_Next(it);
2752 if (item == NULL) {
2753 if (PyErr_Occurred())
2754 goto Fail;
2755 break;
2756 }
2757
2758 /* Convert item to sequence, and verify length 2. */
2759 fast = PySequence_Fast(item, "");
2760 if (fast == NULL) {
2761 if (PyErr_ExceptionMatches(PyExc_TypeError))
2762 PyErr_Format(PyExc_TypeError,
2763 "cannot convert dictionary update "
2764 "sequence element #%zd to a sequence",
2765 i);
2766 goto Fail;
2767 }
2768 n = PySequence_Fast_GET_SIZE(fast);
2769 if (n != 2) {
2770 PyErr_Format(PyExc_ValueError,
2771 "dictionary update sequence element #%zd "
2772 "has length %zd; 2 is required",
2773 i, n);
2774 goto Fail;
2775 }
2776
2777 /* Update/merge with this (key, value) pair. */
2778 key = PySequence_Fast_GET_ITEM(fast, 0);
2779 value = PySequence_Fast_GET_ITEM(fast, 1);
2780 Py_INCREF(key);
2781 Py_INCREF(value);
2782 if (override) {
2783 if (PyDict_SetItem(d, key, value) < 0) {
2784 Py_DECREF(key);
2785 Py_DECREF(value);
2786 goto Fail;
2787 }
2788 }
2789 else {
2790 if (PyDict_SetDefault(d, key, value) == NULL) {
2791 Py_DECREF(key);
2792 Py_DECREF(value);
2793 goto Fail;
2794 }
2795 }
2796
2797 Py_DECREF(key);
2798 Py_DECREF(value);
2799 Py_DECREF(fast);
2800 Py_DECREF(item);
2801 }
2802
2803 i = 0;
2804 ASSERT_CONSISTENT(d);
2805 goto Return;
2806 Fail:
2807 Py_XDECREF(item);
2808 Py_XDECREF(fast);
2809 i = -1;
2810 Return:
2811 Py_DECREF(it);
2812 return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
2813 }
2814
2815 static int
dict_merge(PyObject * a,PyObject * b,int override)2816 dict_merge(PyObject *a, PyObject *b, int override)
2817 {
2818 PyDictObject *mp, *other;
2819
2820 assert(0 <= override && override <= 2);
2821
2822 /* We accept for the argument either a concrete dictionary object,
2823 * or an abstract "mapping" object. For the former, we can do
2824 * things quite efficiently. For the latter, we only require that
2825 * PyMapping_Keys() and PyObject_GetItem() be supported.
2826 */
2827 if (a == NULL || !PyDict_Check(a) || b == NULL) {
2828 PyErr_BadInternalCall();
2829 return -1;
2830 }
2831 mp = (PyDictObject*)a;
2832 if (PyDict_Check(b) && (Py_TYPE(b)->tp_iter == (getiterfunc)dict_iter)) {
2833 other = (PyDictObject*)b;
2834 if (other == mp || other->ma_used == 0)
2835 /* a.update(a) or a.update({}); nothing to do */
2836 return 0;
2837 if (mp->ma_used == 0) {
2838 /* Since the target dict is empty, PyDict_GetItem()
2839 * always returns NULL. Setting override to 1
2840 * skips the unnecessary test.
2841 */
2842 override = 1;
2843 PyDictKeysObject *okeys = other->ma_keys;
2844
2845 // If other is clean, combined, and just allocated, just clone it.
2846 if (other->ma_values == NULL &&
2847 other->ma_used == okeys->dk_nentries &&
2848 (DK_LOG_SIZE(okeys) == PyDict_LOG_MINSIZE ||
2849 USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) {
2850 PyDictKeysObject *keys = clone_combined_dict_keys(other);
2851 if (keys == NULL) {
2852 return -1;
2853 }
2854
2855 dictkeys_decref(mp->ma_keys);
2856 mp->ma_keys = keys;
2857 if (mp->ma_values != NULL) {
2858 free_values(mp->ma_values);
2859 mp->ma_values = NULL;
2860 }
2861
2862 mp->ma_used = other->ma_used;
2863 mp->ma_version_tag = DICT_NEXT_VERSION();
2864 ASSERT_CONSISTENT(mp);
2865
2866 if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) {
2867 /* Maintain tracking. */
2868 _PyObject_GC_TRACK(mp);
2869 }
2870
2871 return 0;
2872 }
2873 }
2874 /* Do one big resize at the start, rather than
2875 * incrementally resizing as we insert new items. Expect
2876 * that there will be no (or few) overlapping keys.
2877 */
2878 if (USABLE_FRACTION(DK_SIZE(mp->ma_keys)) < other->ma_used) {
2879 int unicode = DK_IS_UNICODE(other->ma_keys);
2880 if (dictresize(mp, estimate_log2_keysize(mp->ma_used + other->ma_used), unicode)) {
2881 return -1;
2882 }
2883 }
2884
2885 Py_ssize_t orig_size = other->ma_keys->dk_nentries;
2886 Py_ssize_t pos = 0;
2887 Py_hash_t hash;
2888 PyObject *key, *value;
2889
2890 while (_PyDict_Next((PyObject*)other, &pos, &key, &value, &hash)) {
2891 int err = 0;
2892 Py_INCREF(key);
2893 Py_INCREF(value);
2894 if (override == 1) {
2895 Py_INCREF(key);
2896 Py_INCREF(value);
2897 err = insertdict(mp, key, hash, value);
2898 }
2899 else {
2900 err = _PyDict_Contains_KnownHash(a, key, hash);
2901 if (err == 0) {
2902 Py_INCREF(key);
2903 Py_INCREF(value);
2904 err = insertdict(mp, key, hash, value);
2905 }
2906 else if (err > 0) {
2907 if (override != 0) {
2908 _PyErr_SetKeyError(key);
2909 Py_DECREF(value);
2910 Py_DECREF(key);
2911 return -1;
2912 }
2913 err = 0;
2914 }
2915 }
2916 Py_DECREF(value);
2917 Py_DECREF(key);
2918 if (err != 0)
2919 return -1;
2920
2921 if (orig_size != other->ma_keys->dk_nentries) {
2922 PyErr_SetString(PyExc_RuntimeError,
2923 "dict mutated during update");
2924 return -1;
2925 }
2926 }
2927 }
2928 else {
2929 /* Do it the generic, slower way */
2930 PyObject *keys = PyMapping_Keys(b);
2931 PyObject *iter;
2932 PyObject *key, *value;
2933 int status;
2934
2935 if (keys == NULL)
2936 /* Docstring says this is equivalent to E.keys() so
2937 * if E doesn't have a .keys() method we want
2938 * AttributeError to percolate up. Might as well
2939 * do the same for any other error.
2940 */
2941 return -1;
2942
2943 iter = PyObject_GetIter(keys);
2944 Py_DECREF(keys);
2945 if (iter == NULL)
2946 return -1;
2947
2948 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
2949 if (override != 1) {
2950 status = PyDict_Contains(a, key);
2951 if (status != 0) {
2952 if (status > 0) {
2953 if (override == 0) {
2954 Py_DECREF(key);
2955 continue;
2956 }
2957 _PyErr_SetKeyError(key);
2958 }
2959 Py_DECREF(key);
2960 Py_DECREF(iter);
2961 return -1;
2962 }
2963 }
2964 value = PyObject_GetItem(b, key);
2965 if (value == NULL) {
2966 Py_DECREF(iter);
2967 Py_DECREF(key);
2968 return -1;
2969 }
2970 status = PyDict_SetItem(a, key, value);
2971 Py_DECREF(key);
2972 Py_DECREF(value);
2973 if (status < 0) {
2974 Py_DECREF(iter);
2975 return -1;
2976 }
2977 }
2978 Py_DECREF(iter);
2979 if (PyErr_Occurred())
2980 /* Iterator completed, via error */
2981 return -1;
2982 }
2983 ASSERT_CONSISTENT(a);
2984 return 0;
2985 }
2986
2987 int
PyDict_Update(PyObject * a,PyObject * b)2988 PyDict_Update(PyObject *a, PyObject *b)
2989 {
2990 return dict_merge(a, b, 1);
2991 }
2992
2993 int
PyDict_Merge(PyObject * a,PyObject * b,int override)2994 PyDict_Merge(PyObject *a, PyObject *b, int override)
2995 {
2996 /* XXX Deprecate override not in (0, 1). */
2997 return dict_merge(a, b, override != 0);
2998 }
2999
3000 int
_PyDict_MergeEx(PyObject * a,PyObject * b,int override)3001 _PyDict_MergeEx(PyObject *a, PyObject *b, int override)
3002 {
3003 return dict_merge(a, b, override);
3004 }
3005
3006 static PyObject *
dict_copy(PyDictObject * mp,PyObject * Py_UNUSED (ignored))3007 dict_copy(PyDictObject *mp, PyObject *Py_UNUSED(ignored))
3008 {
3009 return PyDict_Copy((PyObject*)mp);
3010 }
3011
3012 PyObject *
PyDict_Copy(PyObject * o)3013 PyDict_Copy(PyObject *o)
3014 {
3015 PyObject *copy;
3016 PyDictObject *mp;
3017 Py_ssize_t i, n;
3018
3019 if (o == NULL || !PyDict_Check(o)) {
3020 PyErr_BadInternalCall();
3021 return NULL;
3022 }
3023
3024 mp = (PyDictObject *)o;
3025 if (mp->ma_used == 0) {
3026 /* The dict is empty; just return a new dict. */
3027 return PyDict_New();
3028 }
3029
3030 if (_PyDict_HasSplitTable(mp)) {
3031 PyDictObject *split_copy;
3032 Py_ssize_t size = shared_keys_usable_size(mp->ma_keys);
3033 PyDictValues *newvalues;
3034 newvalues = new_values(size);
3035 if (newvalues == NULL)
3036 return PyErr_NoMemory();
3037 split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
3038 if (split_copy == NULL) {
3039 free_values(newvalues);
3040 return NULL;
3041 }
3042 size_t prefix_size = ((uint8_t *)newvalues)[-1];
3043 memcpy(((char *)newvalues)-prefix_size, ((char *)mp->ma_values)-prefix_size, prefix_size-1);
3044 split_copy->ma_values = newvalues;
3045 split_copy->ma_keys = mp->ma_keys;
3046 split_copy->ma_used = mp->ma_used;
3047 split_copy->ma_version_tag = DICT_NEXT_VERSION();
3048 dictkeys_incref(mp->ma_keys);
3049 for (i = 0, n = size; i < n; i++) {
3050 PyObject *value = mp->ma_values->values[i];
3051 Py_XINCREF(value);
3052 split_copy->ma_values->values[i] = value;
3053 }
3054 if (_PyObject_GC_IS_TRACKED(mp))
3055 _PyObject_GC_TRACK(split_copy);
3056 return (PyObject *)split_copy;
3057 }
3058
3059 if (Py_TYPE(mp)->tp_iter == (getiterfunc)dict_iter &&
3060 mp->ma_values == NULL &&
3061 (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
3062 {
3063 /* Use fast-copy if:
3064
3065 (1) type(mp) doesn't override tp_iter; and
3066
3067 (2) 'mp' is not a split-dict; and
3068
3069 (3) if 'mp' is non-compact ('del' operation does not resize dicts),
3070 do fast-copy only if it has at most 1/3 non-used keys.
3071
3072 The last condition (3) is important to guard against a pathological
3073 case when a large dict is almost emptied with multiple del/pop
3074 operations and copied after that. In cases like this, we defer to
3075 PyDict_Merge, which produces a compacted copy.
3076 */
3077 PyDictKeysObject *keys = clone_combined_dict_keys(mp);
3078 if (keys == NULL) {
3079 return NULL;
3080 }
3081 PyDictObject *new = (PyDictObject *)new_dict(keys, NULL, 0, 0);
3082 if (new == NULL) {
3083 /* In case of an error, `new_dict()` takes care of
3084 cleaning up `keys`. */
3085 return NULL;
3086 }
3087
3088 new->ma_used = mp->ma_used;
3089 ASSERT_CONSISTENT(new);
3090 if (_PyObject_GC_IS_TRACKED(mp)) {
3091 /* Maintain tracking. */
3092 _PyObject_GC_TRACK(new);
3093 }
3094
3095 return (PyObject *)new;
3096 }
3097
3098 copy = PyDict_New();
3099 if (copy == NULL)
3100 return NULL;
3101 if (dict_merge(copy, o, 1) == 0)
3102 return copy;
3103 Py_DECREF(copy);
3104 return NULL;
3105 }
3106
3107 Py_ssize_t
PyDict_Size(PyObject * mp)3108 PyDict_Size(PyObject *mp)
3109 {
3110 if (mp == NULL || !PyDict_Check(mp)) {
3111 PyErr_BadInternalCall();
3112 return -1;
3113 }
3114 return ((PyDictObject *)mp)->ma_used;
3115 }
3116
3117 PyObject *
PyDict_Keys(PyObject * mp)3118 PyDict_Keys(PyObject *mp)
3119 {
3120 if (mp == NULL || !PyDict_Check(mp)) {
3121 PyErr_BadInternalCall();
3122 return NULL;
3123 }
3124 return dict_keys((PyDictObject *)mp);
3125 }
3126
3127 PyObject *
PyDict_Values(PyObject * mp)3128 PyDict_Values(PyObject *mp)
3129 {
3130 if (mp == NULL || !PyDict_Check(mp)) {
3131 PyErr_BadInternalCall();
3132 return NULL;
3133 }
3134 return dict_values((PyDictObject *)mp);
3135 }
3136
3137 PyObject *
PyDict_Items(PyObject * mp)3138 PyDict_Items(PyObject *mp)
3139 {
3140 if (mp == NULL || !PyDict_Check(mp)) {
3141 PyErr_BadInternalCall();
3142 return NULL;
3143 }
3144 return dict_items((PyDictObject *)mp);
3145 }
3146
3147 /* Return 1 if dicts equal, 0 if not, -1 if error.
3148 * Gets out as soon as any difference is detected.
3149 * Uses only Py_EQ comparison.
3150 */
3151 static int
dict_equal(PyDictObject * a,PyDictObject * b)3152 dict_equal(PyDictObject *a, PyDictObject *b)
3153 {
3154 Py_ssize_t i;
3155
3156 if (a->ma_used != b->ma_used)
3157 /* can't be equal if # of entries differ */
3158 return 0;
3159 /* Same # of entries -- check all of 'em. Exit early on any diff. */
3160 for (i = 0; i < a->ma_keys->dk_nentries; i++) {
3161 PyObject *key, *aval;
3162 Py_hash_t hash;
3163 if (DK_IS_UNICODE(a->ma_keys)) {
3164 PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(a->ma_keys)[i];
3165 key = ep->me_key;
3166 if (key == NULL) {
3167 continue;
3168 }
3169 hash = unicode_get_hash(key);
3170 if (a->ma_values)
3171 aval = a->ma_values->values[i];
3172 else
3173 aval = ep->me_value;
3174 }
3175 else {
3176 PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
3177 key = ep->me_key;
3178 aval = ep->me_value;
3179 hash = ep->me_hash;
3180 }
3181 if (aval != NULL) {
3182 int cmp;
3183 PyObject *bval;
3184 /* temporarily bump aval's refcount to ensure it stays
3185 alive until we're done with it */
3186 Py_INCREF(aval);
3187 /* ditto for key */
3188 Py_INCREF(key);
3189 /* reuse the known hash value */
3190 _Py_dict_lookup(b, key, hash, &bval);
3191 if (bval == NULL) {
3192 Py_DECREF(key);
3193 Py_DECREF(aval);
3194 if (PyErr_Occurred())
3195 return -1;
3196 return 0;
3197 }
3198 Py_INCREF(bval);
3199 cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
3200 Py_DECREF(key);
3201 Py_DECREF(aval);
3202 Py_DECREF(bval);
3203 if (cmp <= 0) /* error or not equal */
3204 return cmp;
3205 }
3206 }
3207 return 1;
3208 }
3209
3210 static PyObject *
dict_richcompare(PyObject * v,PyObject * w,int op)3211 dict_richcompare(PyObject *v, PyObject *w, int op)
3212 {
3213 int cmp;
3214 PyObject *res;
3215
3216 if (!PyDict_Check(v) || !PyDict_Check(w)) {
3217 res = Py_NotImplemented;
3218 }
3219 else if (op == Py_EQ || op == Py_NE) {
3220 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
3221 if (cmp < 0)
3222 return NULL;
3223 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
3224 }
3225 else
3226 res = Py_NotImplemented;
3227 Py_INCREF(res);
3228 return res;
3229 }
3230
3231 /*[clinic input]
3232
3233 @coexist
3234 dict.__contains__
3235
3236 key: object
3237 /
3238
3239 True if the dictionary has the specified key, else False.
3240 [clinic start generated code]*/
3241
3242 static PyObject *
dict___contains__(PyDictObject * self,PyObject * key)3243 dict___contains__(PyDictObject *self, PyObject *key)
3244 /*[clinic end generated code: output=a3d03db709ed6e6b input=fe1cb42ad831e820]*/
3245 {
3246 register PyDictObject *mp = self;
3247 Py_hash_t hash;
3248 Py_ssize_t ix;
3249 PyObject *value;
3250
3251 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
3252 hash = PyObject_Hash(key);
3253 if (hash == -1)
3254 return NULL;
3255 }
3256 ix = _Py_dict_lookup(mp, key, hash, &value);
3257 if (ix == DKIX_ERROR)
3258 return NULL;
3259 if (ix == DKIX_EMPTY || value == NULL)
3260 Py_RETURN_FALSE;
3261 Py_RETURN_TRUE;
3262 }
3263
3264 /*[clinic input]
3265 dict.get
3266
3267 key: object
3268 default: object = None
3269 /
3270
3271 Return the value for key if key is in the dictionary, else default.
3272 [clinic start generated code]*/
3273
3274 static PyObject *
dict_get_impl(PyDictObject * self,PyObject * key,PyObject * default_value)3275 dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
3276 /*[clinic end generated code: output=bba707729dee05bf input=279ddb5790b6b107]*/
3277 {
3278 PyObject *val = NULL;
3279 Py_hash_t hash;
3280 Py_ssize_t ix;
3281
3282 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
3283 hash = PyObject_Hash(key);
3284 if (hash == -1)
3285 return NULL;
3286 }
3287 ix = _Py_dict_lookup(self, key, hash, &val);
3288 if (ix == DKIX_ERROR)
3289 return NULL;
3290 if (ix == DKIX_EMPTY || val == NULL) {
3291 val = default_value;
3292 }
3293 Py_INCREF(val);
3294 return val;
3295 }
3296
3297 PyObject *
PyDict_SetDefault(PyObject * d,PyObject * key,PyObject * defaultobj)3298 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
3299 {
3300 PyDictObject *mp = (PyDictObject *)d;
3301 PyObject *value;
3302 Py_hash_t hash;
3303
3304 if (!PyDict_Check(d)) {
3305 PyErr_BadInternalCall();
3306 return NULL;
3307 }
3308
3309 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
3310 hash = PyObject_Hash(key);
3311 if (hash == -1)
3312 return NULL;
3313 }
3314
3315 if (mp->ma_keys == Py_EMPTY_KEYS) {
3316 Py_INCREF(key);
3317 Py_INCREF(defaultobj);
3318 if (insert_to_emptydict(mp, key, hash, defaultobj) < 0) {
3319 return NULL;
3320 }
3321 return defaultobj;
3322 }
3323
3324 if (!PyUnicode_CheckExact(key) && DK_IS_UNICODE(mp->ma_keys)) {
3325 if (insertion_resize(mp, 0) < 0) {
3326 return NULL;
3327 }
3328 }
3329
3330 Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &value);
3331 if (ix == DKIX_ERROR)
3332 return NULL;
3333
3334 if (ix == DKIX_EMPTY) {
3335 mp->ma_keys->dk_version = 0;
3336 value = defaultobj;
3337 if (mp->ma_keys->dk_usable <= 0) {
3338 if (insertion_resize(mp, 1) < 0) {
3339 return NULL;
3340 }
3341 }
3342 Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
3343 dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
3344 if (DK_IS_UNICODE(mp->ma_keys)) {
3345 assert(PyUnicode_CheckExact(key));
3346 PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
3347 ep->me_key = key;
3348 if (_PyDict_HasSplitTable(mp)) {
3349 Py_ssize_t index = (int)mp->ma_keys->dk_nentries;
3350 assert(index < SHARED_KEYS_MAX_SIZE);
3351 assert(mp->ma_values->values[index] == NULL);
3352 mp->ma_values->values[index] = value;
3353 _PyDictValues_AddToInsertionOrder(mp->ma_values, index);
3354 }
3355 else {
3356 ep->me_value = value;
3357 }
3358 }
3359 else {
3360 PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
3361 ep->me_key = key;
3362 ep->me_hash = hash;
3363 ep->me_value = value;
3364 }
3365 Py_INCREF(key);
3366 Py_INCREF(value);
3367 MAINTAIN_TRACKING(mp, key, value);
3368 mp->ma_used++;
3369 mp->ma_version_tag = DICT_NEXT_VERSION();
3370 mp->ma_keys->dk_usable--;
3371 mp->ma_keys->dk_nentries++;
3372 assert(mp->ma_keys->dk_usable >= 0);
3373 }
3374 else if (value == NULL) {
3375 value = defaultobj;
3376 assert(_PyDict_HasSplitTable(mp));
3377 assert(mp->ma_values->values[ix] == NULL);
3378 Py_INCREF(value);
3379 MAINTAIN_TRACKING(mp, key, value);
3380 mp->ma_values->values[ix] = value;
3381 _PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
3382 mp->ma_used++;
3383 mp->ma_version_tag = DICT_NEXT_VERSION();
3384 }
3385
3386 ASSERT_CONSISTENT(mp);
3387 return value;
3388 }
3389
3390 /*[clinic input]
3391 dict.setdefault
3392
3393 key: object
3394 default: object = None
3395 /
3396
3397 Insert key with a value of default if key is not in the dictionary.
3398
3399 Return the value for key if key is in the dictionary, else default.
3400 [clinic start generated code]*/
3401
3402 static PyObject *
dict_setdefault_impl(PyDictObject * self,PyObject * key,PyObject * default_value)3403 dict_setdefault_impl(PyDictObject *self, PyObject *key,
3404 PyObject *default_value)
3405 /*[clinic end generated code: output=f8c1101ebf69e220 input=0f063756e815fd9d]*/
3406 {
3407 PyObject *val;
3408
3409 val = PyDict_SetDefault((PyObject *)self, key, default_value);
3410 Py_XINCREF(val);
3411 return val;
3412 }
3413
3414 static PyObject *
dict_clear(PyDictObject * mp,PyObject * Py_UNUSED (ignored))3415 dict_clear(PyDictObject *mp, PyObject *Py_UNUSED(ignored))
3416 {
3417 PyDict_Clear((PyObject *)mp);
3418 Py_RETURN_NONE;
3419 }
3420
3421 /*[clinic input]
3422 dict.pop
3423
3424 key: object
3425 default: object = NULL
3426 /
3427
3428 D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
3429
3430 If the key is not found, return the default if given; otherwise,
3431 raise a KeyError.
3432 [clinic start generated code]*/
3433
3434 static PyObject *
dict_pop_impl(PyDictObject * self,PyObject * key,PyObject * default_value)3435 dict_pop_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
3436 /*[clinic end generated code: output=3abb47b89f24c21c input=e221baa01044c44c]*/
3437 {
3438 return _PyDict_Pop((PyObject*)self, key, default_value);
3439 }
3440
3441 /*[clinic input]
3442 dict.popitem
3443
3444 Remove and return a (key, value) pair as a 2-tuple.
3445
3446 Pairs are returned in LIFO (last-in, first-out) order.
3447 Raises KeyError if the dict is empty.
3448 [clinic start generated code]*/
3449
3450 static PyObject *
dict_popitem_impl(PyDictObject * self)3451 dict_popitem_impl(PyDictObject *self)
3452 /*[clinic end generated code: output=e65fcb04420d230d input=1c38a49f21f64941]*/
3453 {
3454 Py_ssize_t i, j;
3455 PyObject *res;
3456
3457 /* Allocate the result tuple before checking the size. Believe it
3458 * or not, this allocation could trigger a garbage collection which
3459 * could empty the dict, so if we checked the size first and that
3460 * happened, the result would be an infinite loop (searching for an
3461 * entry that no longer exists). Note that the usual popitem()
3462 * idiom is "while d: k, v = d.popitem()". so needing to throw the
3463 * tuple away if the dict *is* empty isn't a significant
3464 * inefficiency -- possible, but unlikely in practice.
3465 */
3466 res = PyTuple_New(2);
3467 if (res == NULL)
3468 return NULL;
3469 if (self->ma_used == 0) {
3470 Py_DECREF(res);
3471 PyErr_SetString(PyExc_KeyError, "popitem(): dictionary is empty");
3472 return NULL;
3473 }
3474 /* Convert split table to combined table */
3475 if (self->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
3476 if (dictresize(self, DK_LOG_SIZE(self->ma_keys), 1)) {
3477 Py_DECREF(res);
3478 return NULL;
3479 }
3480 }
3481 self->ma_keys->dk_version = 0;
3482
3483 /* Pop last item */
3484 PyObject *key, *value;
3485 Py_hash_t hash;
3486 if (DK_IS_UNICODE(self->ma_keys)) {
3487 PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(self->ma_keys);
3488 i = self->ma_keys->dk_nentries - 1;
3489 while (i >= 0 && ep0[i].me_value == NULL) {
3490 i--;
3491 }
3492 assert(i >= 0);
3493
3494 key = ep0[i].me_key;
3495 hash = unicode_get_hash(key);
3496 value = ep0[i].me_value;
3497 ep0[i].me_key = NULL;
3498 ep0[i].me_value = NULL;
3499 }
3500 else {
3501 PyDictKeyEntry *ep0 = DK_ENTRIES(self->ma_keys);
3502 i = self->ma_keys->dk_nentries - 1;
3503 while (i >= 0 && ep0[i].me_value == NULL) {
3504 i--;
3505 }
3506 assert(i >= 0);
3507
3508 key = ep0[i].me_key;
3509 hash = ep0[i].me_hash;
3510 value = ep0[i].me_value;
3511 ep0[i].me_key = NULL;
3512 ep0[i].me_hash = -1;
3513 ep0[i].me_value = NULL;
3514 }
3515
3516 j = lookdict_index(self->ma_keys, hash, i);
3517 assert(j >= 0);
3518 assert(dictkeys_get_index(self->ma_keys, j) == i);
3519 dictkeys_set_index(self->ma_keys, j, DKIX_DUMMY);
3520
3521 PyTuple_SET_ITEM(res, 0, key);
3522 PyTuple_SET_ITEM(res, 1, value);
3523 /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
3524 self->ma_keys->dk_nentries = i;
3525 self->ma_used--;
3526 self->ma_version_tag = DICT_NEXT_VERSION();
3527 ASSERT_CONSISTENT(self);
3528 return res;
3529 }
3530
3531 static int
dict_traverse(PyObject * op,visitproc visit,void * arg)3532 dict_traverse(PyObject *op, visitproc visit, void *arg)
3533 {
3534 PyDictObject *mp = (PyDictObject *)op;
3535 PyDictKeysObject *keys = mp->ma_keys;
3536 Py_ssize_t i, n = keys->dk_nentries;
3537
3538 if (DK_IS_UNICODE(keys)) {
3539 if (mp->ma_values != NULL) {
3540 for (i = 0; i < n; i++) {
3541 Py_VISIT(mp->ma_values->values[i]);
3542 }
3543 }
3544 else {
3545 PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
3546 for (i = 0; i < n; i++) {
3547 Py_VISIT(entries[i].me_value);
3548 }
3549 }
3550 }
3551 else {
3552 PyDictKeyEntry *entries = DK_ENTRIES(keys);
3553 for (i = 0; i < n; i++) {
3554 if (entries[i].me_value != NULL) {
3555 Py_VISIT(entries[i].me_value);
3556 Py_VISIT(entries[i].me_key);
3557 }
3558 }
3559 }
3560 return 0;
3561 }
3562
3563 static int
dict_tp_clear(PyObject * op)3564 dict_tp_clear(PyObject *op)
3565 {
3566 PyDict_Clear(op);
3567 return 0;
3568 }
3569
3570 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
3571
3572 Py_ssize_t
_PyDict_SizeOf(PyDictObject * mp)3573 _PyDict_SizeOf(PyDictObject *mp)
3574 {
3575 Py_ssize_t res;
3576
3577 res = _PyObject_SIZE(Py_TYPE(mp));
3578 if (mp->ma_values) {
3579 res += shared_keys_usable_size(mp->ma_keys) * sizeof(PyObject*);
3580 }
3581 /* If the dictionary is split, the keys portion is accounted-for
3582 in the type object. */
3583 if (mp->ma_keys->dk_refcnt == 1) {
3584 res += _PyDict_KeysSize(mp->ma_keys);
3585 }
3586 return res;
3587 }
3588
3589 Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject * keys)3590 _PyDict_KeysSize(PyDictKeysObject *keys)
3591 {
3592 size_t es = keys->dk_kind == DICT_KEYS_GENERAL
3593 ? sizeof(PyDictKeyEntry) : sizeof(PyDictUnicodeEntry);
3594 return (sizeof(PyDictKeysObject)
3595 + ((size_t)1 << keys->dk_log2_index_bytes)
3596 + USABLE_FRACTION(DK_SIZE(keys)) * es);
3597 }
3598
3599 static PyObject *
dict_sizeof(PyDictObject * mp,PyObject * Py_UNUSED (ignored))3600 dict_sizeof(PyDictObject *mp, PyObject *Py_UNUSED(ignored))
3601 {
3602 return PyLong_FromSsize_t(_PyDict_SizeOf(mp));
3603 }
3604
3605 static PyObject *
dict_or(PyObject * self,PyObject * other)3606 dict_or(PyObject *self, PyObject *other)
3607 {
3608 if (!PyDict_Check(self) || !PyDict_Check(other)) {
3609 Py_RETURN_NOTIMPLEMENTED;
3610 }
3611 PyObject *new = PyDict_Copy(self);
3612 if (new == NULL) {
3613 return NULL;
3614 }
3615 if (dict_update_arg(new, other)) {
3616 Py_DECREF(new);
3617 return NULL;
3618 }
3619 return new;
3620 }
3621
3622 static PyObject *
dict_ior(PyObject * self,PyObject * other)3623 dict_ior(PyObject *self, PyObject *other)
3624 {
3625 if (dict_update_arg(self, other)) {
3626 return NULL;
3627 }
3628 Py_INCREF(self);
3629 return self;
3630 }
3631
3632 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
3633
3634 PyDoc_STRVAR(sizeof__doc__,
3635 "D.__sizeof__() -> size of D in memory, in bytes");
3636
3637 PyDoc_STRVAR(update__doc__,
3638 "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\
3639 If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\
3640 If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\
3641 In either case, this is followed by: for k in F: D[k] = F[k]");
3642
3643 PyDoc_STRVAR(clear__doc__,
3644 "D.clear() -> None. Remove all items from D.");
3645
3646 PyDoc_STRVAR(copy__doc__,
3647 "D.copy() -> a shallow copy of D");
3648
3649 /* Forward */
3650 static PyObject *dictkeys_new(PyObject *, PyObject *);
3651 static PyObject *dictitems_new(PyObject *, PyObject *);
3652 static PyObject *dictvalues_new(PyObject *, PyObject *);
3653
3654 PyDoc_STRVAR(keys__doc__,
3655 "D.keys() -> a set-like object providing a view on D's keys");
3656 PyDoc_STRVAR(items__doc__,
3657 "D.items() -> a set-like object providing a view on D's items");
3658 PyDoc_STRVAR(values__doc__,
3659 "D.values() -> an object providing a view on D's values");
3660
3661 static PyMethodDef mapp_methods[] = {
3662 DICT___CONTAINS___METHODDEF
3663 {"__getitem__", _PyCFunction_CAST(dict_subscript), METH_O | METH_COEXIST,
3664 getitem__doc__},
3665 {"__sizeof__", _PyCFunction_CAST(dict_sizeof), METH_NOARGS,
3666 sizeof__doc__},
3667 DICT_GET_METHODDEF
3668 DICT_SETDEFAULT_METHODDEF
3669 DICT_POP_METHODDEF
3670 DICT_POPITEM_METHODDEF
3671 {"keys", dictkeys_new, METH_NOARGS,
3672 keys__doc__},
3673 {"items", dictitems_new, METH_NOARGS,
3674 items__doc__},
3675 {"values", dictvalues_new, METH_NOARGS,
3676 values__doc__},
3677 {"update", _PyCFunction_CAST(dict_update), METH_VARARGS | METH_KEYWORDS,
3678 update__doc__},
3679 DICT_FROMKEYS_METHODDEF
3680 {"clear", (PyCFunction)dict_clear, METH_NOARGS,
3681 clear__doc__},
3682 {"copy", (PyCFunction)dict_copy, METH_NOARGS,
3683 copy__doc__},
3684 DICT___REVERSED___METHODDEF
3685 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
3686 {NULL, NULL} /* sentinel */
3687 };
3688
3689 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
3690 int
PyDict_Contains(PyObject * op,PyObject * key)3691 PyDict_Contains(PyObject *op, PyObject *key)
3692 {
3693 Py_hash_t hash;
3694 Py_ssize_t ix;
3695 PyDictObject *mp = (PyDictObject *)op;
3696 PyObject *value;
3697
3698 if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
3699 hash = PyObject_Hash(key);
3700 if (hash == -1)
3701 return -1;
3702 }
3703 ix = _Py_dict_lookup(mp, key, hash, &value);
3704 if (ix == DKIX_ERROR)
3705 return -1;
3706 return (ix != DKIX_EMPTY && value != NULL);
3707 }
3708
3709 /* Internal version of PyDict_Contains used when the hash value is already known */
3710 int
_PyDict_Contains_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)3711 _PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
3712 {
3713 PyDictObject *mp = (PyDictObject *)op;
3714 PyObject *value;
3715 Py_ssize_t ix;
3716
3717 ix = _Py_dict_lookup(mp, key, hash, &value);
3718 if (ix == DKIX_ERROR)
3719 return -1;
3720 return (ix != DKIX_EMPTY && value != NULL);
3721 }
3722
3723 int
_PyDict_ContainsId(PyObject * op,_Py_Identifier * key)3724 _PyDict_ContainsId(PyObject *op, _Py_Identifier *key)
3725 {
3726 PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
3727 if (kv == NULL) {
3728 return -1;
3729 }
3730 return PyDict_Contains(op, kv);
3731 }
3732
3733 /* Hack to implement "key in dict" */
3734 static PySequenceMethods dict_as_sequence = {
3735 0, /* sq_length */
3736 0, /* sq_concat */
3737 0, /* sq_repeat */
3738 0, /* sq_item */
3739 0, /* sq_slice */
3740 0, /* sq_ass_item */
3741 0, /* sq_ass_slice */
3742 PyDict_Contains, /* sq_contains */
3743 0, /* sq_inplace_concat */
3744 0, /* sq_inplace_repeat */
3745 };
3746
3747 static PyNumberMethods dict_as_number = {
3748 .nb_or = dict_or,
3749 .nb_inplace_or = dict_ior,
3750 };
3751
3752 static PyObject *
dict_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3753 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3754 {
3755 assert(type != NULL);
3756 assert(type->tp_alloc != NULL);
3757 // dict subclasses must implement the GC protocol
3758 assert(_PyType_IS_GC(type));
3759
3760 PyObject *self = type->tp_alloc(type, 0);
3761 if (self == NULL) {
3762 return NULL;
3763 }
3764 PyDictObject *d = (PyDictObject *)self;
3765
3766 d->ma_used = 0;
3767 d->ma_version_tag = DICT_NEXT_VERSION();
3768 dictkeys_incref(Py_EMPTY_KEYS);
3769 d->ma_keys = Py_EMPTY_KEYS;
3770 d->ma_values = NULL;
3771 ASSERT_CONSISTENT(d);
3772
3773 if (type != &PyDict_Type) {
3774 // Don't track if a subclass tp_alloc is PyType_GenericAlloc()
3775 if (!_PyObject_GC_IS_TRACKED(d)) {
3776 _PyObject_GC_TRACK(d);
3777 }
3778 }
3779 else {
3780 // _PyType_AllocNoTrack() does not track the created object
3781 assert(!_PyObject_GC_IS_TRACKED(d));
3782 }
3783 return self;
3784 }
3785
3786 static int
dict_init(PyObject * self,PyObject * args,PyObject * kwds)3787 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
3788 {
3789 return dict_update_common(self, args, kwds, "dict");
3790 }
3791
3792 static PyObject *
dict_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)3793 dict_vectorcall(PyObject *type, PyObject * const*args,
3794 size_t nargsf, PyObject *kwnames)
3795 {
3796 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
3797 if (!_PyArg_CheckPositional("dict", nargs, 0, 1)) {
3798 return NULL;
3799 }
3800
3801 PyObject *self = dict_new(_PyType_CAST(type), NULL, NULL);
3802 if (self == NULL) {
3803 return NULL;
3804 }
3805 if (nargs == 1) {
3806 if (dict_update_arg(self, args[0]) < 0) {
3807 Py_DECREF(self);
3808 return NULL;
3809 }
3810 args++;
3811 }
3812 if (kwnames != NULL) {
3813 for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(kwnames); i++) {
3814 if (PyDict_SetItem(self, PyTuple_GET_ITEM(kwnames, i), args[i]) < 0) {
3815 Py_DECREF(self);
3816 return NULL;
3817 }
3818 }
3819 }
3820 return self;
3821 }
3822
3823 static PyObject *
dict_iter(PyDictObject * dict)3824 dict_iter(PyDictObject *dict)
3825 {
3826 return dictiter_new(dict, &PyDictIterKey_Type);
3827 }
3828
3829 PyDoc_STRVAR(dictionary_doc,
3830 "dict() -> new empty dictionary\n"
3831 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
3832 " (key, value) pairs\n"
3833 "dict(iterable) -> new dictionary initialized as if via:\n"
3834 " d = {}\n"
3835 " for k, v in iterable:\n"
3836 " d[k] = v\n"
3837 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
3838 " in the keyword argument list. For example: dict(one=1, two=2)");
3839
3840 PyTypeObject PyDict_Type = {
3841 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3842 "dict",
3843 sizeof(PyDictObject),
3844 0,
3845 (destructor)dict_dealloc, /* tp_dealloc */
3846 0, /* tp_vectorcall_offset */
3847 0, /* tp_getattr */
3848 0, /* tp_setattr */
3849 0, /* tp_as_async */
3850 (reprfunc)dict_repr, /* tp_repr */
3851 &dict_as_number, /* tp_as_number */
3852 &dict_as_sequence, /* tp_as_sequence */
3853 &dict_as_mapping, /* tp_as_mapping */
3854 PyObject_HashNotImplemented, /* tp_hash */
3855 0, /* tp_call */
3856 0, /* tp_str */
3857 PyObject_GenericGetAttr, /* tp_getattro */
3858 0, /* tp_setattro */
3859 0, /* tp_as_buffer */
3860 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3861 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS |
3862 _Py_TPFLAGS_MATCH_SELF | Py_TPFLAGS_MAPPING, /* tp_flags */
3863 dictionary_doc, /* tp_doc */
3864 dict_traverse, /* tp_traverse */
3865 dict_tp_clear, /* tp_clear */
3866 dict_richcompare, /* tp_richcompare */
3867 0, /* tp_weaklistoffset */
3868 (getiterfunc)dict_iter, /* tp_iter */
3869 0, /* tp_iternext */
3870 mapp_methods, /* tp_methods */
3871 0, /* tp_members */
3872 0, /* tp_getset */
3873 0, /* tp_base */
3874 0, /* tp_dict */
3875 0, /* tp_descr_get */
3876 0, /* tp_descr_set */
3877 0, /* tp_dictoffset */
3878 dict_init, /* tp_init */
3879 _PyType_AllocNoTrack, /* tp_alloc */
3880 dict_new, /* tp_new */
3881 PyObject_GC_Del, /* tp_free */
3882 .tp_vectorcall = dict_vectorcall,
3883 };
3884
3885 /* For backward compatibility with old dictionary interface */
3886
3887 PyObject *
PyDict_GetItemString(PyObject * v,const char * key)3888 PyDict_GetItemString(PyObject *v, const char *key)
3889 {
3890 PyObject *kv, *rv;
3891 kv = PyUnicode_FromString(key);
3892 if (kv == NULL) {
3893 PyErr_Clear();
3894 return NULL;
3895 }
3896 rv = PyDict_GetItem(v, kv);
3897 Py_DECREF(kv);
3898 return rv;
3899 }
3900
3901 int
_PyDict_SetItemId(PyObject * v,_Py_Identifier * key,PyObject * item)3902 _PyDict_SetItemId(PyObject *v, _Py_Identifier *key, PyObject *item)
3903 {
3904 PyObject *kv;
3905 kv = _PyUnicode_FromId(key); /* borrowed */
3906 if (kv == NULL)
3907 return -1;
3908 return PyDict_SetItem(v, kv, item);
3909 }
3910
3911 int
PyDict_SetItemString(PyObject * v,const char * key,PyObject * item)3912 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
3913 {
3914 PyObject *kv;
3915 int err;
3916 kv = PyUnicode_FromString(key);
3917 if (kv == NULL)
3918 return -1;
3919 PyUnicode_InternInPlace(&kv); /* XXX Should we really? */
3920 err = PyDict_SetItem(v, kv, item);
3921 Py_DECREF(kv);
3922 return err;
3923 }
3924
3925 int
_PyDict_DelItemId(PyObject * v,_Py_Identifier * key)3926 _PyDict_DelItemId(PyObject *v, _Py_Identifier *key)
3927 {
3928 PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
3929 if (kv == NULL)
3930 return -1;
3931 return PyDict_DelItem(v, kv);
3932 }
3933
3934 int
PyDict_DelItemString(PyObject * v,const char * key)3935 PyDict_DelItemString(PyObject *v, const char *key)
3936 {
3937 PyObject *kv;
3938 int err;
3939 kv = PyUnicode_FromString(key);
3940 if (kv == NULL)
3941 return -1;
3942 err = PyDict_DelItem(v, kv);
3943 Py_DECREF(kv);
3944 return err;
3945 }
3946
3947 /* Dictionary iterator types */
3948
3949 typedef struct {
3950 PyObject_HEAD
3951 PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
3952 Py_ssize_t di_used;
3953 Py_ssize_t di_pos;
3954 PyObject* di_result; /* reusable result tuple for iteritems */
3955 Py_ssize_t len;
3956 } dictiterobject;
3957
3958 static PyObject *
dictiter_new(PyDictObject * dict,PyTypeObject * itertype)3959 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
3960 {
3961 dictiterobject *di;
3962 di = PyObject_GC_New(dictiterobject, itertype);
3963 if (di == NULL) {
3964 return NULL;
3965 }
3966 Py_INCREF(dict);
3967 di->di_dict = dict;
3968 di->di_used = dict->ma_used;
3969 di->len = dict->ma_used;
3970 if (itertype == &PyDictRevIterKey_Type ||
3971 itertype == &PyDictRevIterItem_Type ||
3972 itertype == &PyDictRevIterValue_Type) {
3973 if (dict->ma_values) {
3974 di->di_pos = dict->ma_used - 1;
3975 }
3976 else {
3977 di->di_pos = dict->ma_keys->dk_nentries - 1;
3978 }
3979 }
3980 else {
3981 di->di_pos = 0;
3982 }
3983 if (itertype == &PyDictIterItem_Type ||
3984 itertype == &PyDictRevIterItem_Type) {
3985 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
3986 if (di->di_result == NULL) {
3987 Py_DECREF(di);
3988 return NULL;
3989 }
3990 }
3991 else {
3992 di->di_result = NULL;
3993 }
3994 _PyObject_GC_TRACK(di);
3995 return (PyObject *)di;
3996 }
3997
3998 static void
dictiter_dealloc(dictiterobject * di)3999 dictiter_dealloc(dictiterobject *di)
4000 {
4001 /* bpo-31095: UnTrack is needed before calling any callbacks */
4002 _PyObject_GC_UNTRACK(di);
4003 Py_XDECREF(di->di_dict);
4004 Py_XDECREF(di->di_result);
4005 PyObject_GC_Del(di);
4006 }
4007
4008 static int
dictiter_traverse(dictiterobject * di,visitproc visit,void * arg)4009 dictiter_traverse(dictiterobject *di, visitproc visit, void *arg)
4010 {
4011 Py_VISIT(di->di_dict);
4012 Py_VISIT(di->di_result);
4013 return 0;
4014 }
4015
4016 static PyObject *
dictiter_len(dictiterobject * di,PyObject * Py_UNUSED (ignored))4017 dictiter_len(dictiterobject *di, PyObject *Py_UNUSED(ignored))
4018 {
4019 Py_ssize_t len = 0;
4020 if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
4021 len = di->len;
4022 return PyLong_FromSize_t(len);
4023 }
4024
4025 PyDoc_STRVAR(length_hint_doc,
4026 "Private method returning an estimate of len(list(it)).");
4027
4028 static PyObject *
4029 dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored));
4030
4031 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
4032
4033 static PyMethodDef dictiter_methods[] = {
4034 {"__length_hint__", _PyCFunction_CAST(dictiter_len), METH_NOARGS,
4035 length_hint_doc},
4036 {"__reduce__", _PyCFunction_CAST(dictiter_reduce), METH_NOARGS,
4037 reduce_doc},
4038 {NULL, NULL} /* sentinel */
4039 };
4040
4041 static PyObject*
dictiter_iternextkey(dictiterobject * di)4042 dictiter_iternextkey(dictiterobject *di)
4043 {
4044 PyObject *key;
4045 Py_ssize_t i;
4046 PyDictKeysObject *k;
4047 PyDictObject *d = di->di_dict;
4048
4049 if (d == NULL)
4050 return NULL;
4051 assert (PyDict_Check(d));
4052
4053 if (di->di_used != d->ma_used) {
4054 PyErr_SetString(PyExc_RuntimeError,
4055 "dictionary changed size during iteration");
4056 di->di_used = -1; /* Make this state sticky */
4057 return NULL;
4058 }
4059
4060 i = di->di_pos;
4061 k = d->ma_keys;
4062 assert(i >= 0);
4063 if (d->ma_values) {
4064 if (i >= d->ma_used)
4065 goto fail;
4066 int index = get_index_from_order(d, i);
4067 key = DK_UNICODE_ENTRIES(k)[index].me_key;
4068 assert(d->ma_values->values[index] != NULL);
4069 }
4070 else {
4071 Py_ssize_t n = k->dk_nentries;
4072 if (DK_IS_UNICODE(k)) {
4073 PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(k)[i];
4074 while (i < n && entry_ptr->me_value == NULL) {
4075 entry_ptr++;
4076 i++;
4077 }
4078 if (i >= n)
4079 goto fail;
4080 key = entry_ptr->me_key;
4081 }
4082 else {
4083 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
4084 while (i < n && entry_ptr->me_value == NULL) {
4085 entry_ptr++;
4086 i++;
4087 }
4088 if (i >= n)
4089 goto fail;
4090 key = entry_ptr->me_key;
4091 }
4092 }
4093 // We found an element (key), but did not expect it
4094 if (di->len == 0) {
4095 PyErr_SetString(PyExc_RuntimeError,
4096 "dictionary keys changed during iteration");
4097 goto fail;
4098 }
4099 di->di_pos = i+1;
4100 di->len--;
4101 Py_INCREF(key);
4102 return key;
4103
4104 fail:
4105 di->di_dict = NULL;
4106 Py_DECREF(d);
4107 return NULL;
4108 }
4109
4110 PyTypeObject PyDictIterKey_Type = {
4111 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4112 "dict_keyiterator", /* tp_name */
4113 sizeof(dictiterobject), /* tp_basicsize */
4114 0, /* tp_itemsize */
4115 /* methods */
4116 (destructor)dictiter_dealloc, /* tp_dealloc */
4117 0, /* tp_vectorcall_offset */
4118 0, /* tp_getattr */
4119 0, /* tp_setattr */
4120 0, /* tp_as_async */
4121 0, /* tp_repr */
4122 0, /* tp_as_number */
4123 0, /* tp_as_sequence */
4124 0, /* tp_as_mapping */
4125 0, /* tp_hash */
4126 0, /* tp_call */
4127 0, /* tp_str */
4128 PyObject_GenericGetAttr, /* tp_getattro */
4129 0, /* tp_setattro */
4130 0, /* tp_as_buffer */
4131 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4132 0, /* tp_doc */
4133 (traverseproc)dictiter_traverse, /* tp_traverse */
4134 0, /* tp_clear */
4135 0, /* tp_richcompare */
4136 0, /* tp_weaklistoffset */
4137 PyObject_SelfIter, /* tp_iter */
4138 (iternextfunc)dictiter_iternextkey, /* tp_iternext */
4139 dictiter_methods, /* tp_methods */
4140 0,
4141 };
4142
4143 static PyObject *
dictiter_iternextvalue(dictiterobject * di)4144 dictiter_iternextvalue(dictiterobject *di)
4145 {
4146 PyObject *value;
4147 Py_ssize_t i;
4148 PyDictObject *d = di->di_dict;
4149
4150 if (d == NULL)
4151 return NULL;
4152 assert (PyDict_Check(d));
4153
4154 if (di->di_used != d->ma_used) {
4155 PyErr_SetString(PyExc_RuntimeError,
4156 "dictionary changed size during iteration");
4157 di->di_used = -1; /* Make this state sticky */
4158 return NULL;
4159 }
4160
4161 i = di->di_pos;
4162 assert(i >= 0);
4163 if (d->ma_values) {
4164 if (i >= d->ma_used)
4165 goto fail;
4166 int index = get_index_from_order(d, i);
4167 value = d->ma_values->values[index];
4168 assert(value != NULL);
4169 }
4170 else {
4171 Py_ssize_t n = d->ma_keys->dk_nentries;
4172 if (DK_IS_UNICODE(d->ma_keys)) {
4173 PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(d->ma_keys)[i];
4174 while (i < n && entry_ptr->me_value == NULL) {
4175 entry_ptr++;
4176 i++;
4177 }
4178 if (i >= n)
4179 goto fail;
4180 value = entry_ptr->me_value;
4181 }
4182 else {
4183 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
4184 while (i < n && entry_ptr->me_value == NULL) {
4185 entry_ptr++;
4186 i++;
4187 }
4188 if (i >= n)
4189 goto fail;
4190 value = entry_ptr->me_value;
4191 }
4192 }
4193 // We found an element, but did not expect it
4194 if (di->len == 0) {
4195 PyErr_SetString(PyExc_RuntimeError,
4196 "dictionary keys changed during iteration");
4197 goto fail;
4198 }
4199 di->di_pos = i+1;
4200 di->len--;
4201 Py_INCREF(value);
4202 return value;
4203
4204 fail:
4205 di->di_dict = NULL;
4206 Py_DECREF(d);
4207 return NULL;
4208 }
4209
4210 PyTypeObject PyDictIterValue_Type = {
4211 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4212 "dict_valueiterator", /* tp_name */
4213 sizeof(dictiterobject), /* tp_basicsize */
4214 0, /* tp_itemsize */
4215 /* methods */
4216 (destructor)dictiter_dealloc, /* tp_dealloc */
4217 0, /* tp_vectorcall_offset */
4218 0, /* tp_getattr */
4219 0, /* tp_setattr */
4220 0, /* tp_as_async */
4221 0, /* tp_repr */
4222 0, /* tp_as_number */
4223 0, /* tp_as_sequence */
4224 0, /* tp_as_mapping */
4225 0, /* tp_hash */
4226 0, /* tp_call */
4227 0, /* tp_str */
4228 PyObject_GenericGetAttr, /* tp_getattro */
4229 0, /* tp_setattro */
4230 0, /* tp_as_buffer */
4231 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
4232 0, /* tp_doc */
4233 (traverseproc)dictiter_traverse, /* tp_traverse */
4234 0, /* tp_clear */
4235 0, /* tp_richcompare */
4236 0, /* tp_weaklistoffset */
4237 PyObject_SelfIter, /* tp_iter */
4238 (iternextfunc)dictiter_iternextvalue, /* tp_iternext */
4239 dictiter_methods, /* tp_methods */
4240 0,
4241 };
4242
4243 static PyObject *
dictiter_iternextitem(dictiterobject * di)4244 dictiter_iternextitem(dictiterobject *di)
4245 {
4246 PyObject *key, *value, *result;
4247 Py_ssize_t i;
4248 PyDictObject *d = di->di_dict;
4249
4250 if (d == NULL)
4251 return NULL;
4252 assert (PyDict_Check(d));
4253
4254 if (di->di_used != d->ma_used) {
4255 PyErr_SetString(PyExc_RuntimeError,
4256 "dictionary changed size during iteration");
4257 di->di_used = -1; /* Make this state sticky */
4258 return NULL;
4259 }
4260
4261 i = di->di_pos;
4262 assert(i >= 0);
4263 if (d->ma_values) {
4264 if (i >= d->ma_used)
4265 goto fail;
4266 int index = get_index_from_order(d, i);
4267 key = DK_UNICODE_ENTRIES(d->ma_keys)[index].me_key;
4268 value = d->ma_values->values[index];
4269 assert(value != NULL);
4270 }
4271 else {
4272 Py_ssize_t n = d->ma_keys->dk_nentries;
4273 if (DK_IS_UNICODE(d->ma_keys)) {
4274 PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(d->ma_keys)[i];
4275 while (i < n && entry_ptr->me_value == NULL) {
4276 entry_ptr++;
4277 i++;
4278 }
4279 if (i >= n)
4280 goto fail;
4281 key = entry_ptr->me_key;
4282 value = entry_ptr->me_value;
4283 }
4284 else {
4285 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
4286 while (i < n && entry_ptr->me_value == NULL) {
4287 entry_ptr++;
4288 i++;
4289 }
4290 if (i >= n)
4291 goto fail;
4292 key = entry_ptr->me_key;
4293 value = entry_ptr->me_value;
4294 }
4295 }
4296 // We found an element, but did not expect it
4297 if (di->len == 0) {
4298 PyErr_SetString(PyExc_RuntimeError,
4299 "dictionary keys changed during iteration");
4300 goto fail;
4301 }
4302 di->di_pos = i+1;
4303 di->len--;
4304 Py_INCREF(key);
4305 Py_INCREF(value);
4306 result = di->di_result;
4307 if (Py_REFCNT(result) == 1) {
4308 PyObject *oldkey = PyTuple_GET_ITEM(result, 0);
4309 PyObject *oldvalue = PyTuple_GET_ITEM(result, 1);
4310 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
4311 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
4312 Py_INCREF(result);
4313 Py_DECREF(oldkey);
4314 Py_DECREF(oldvalue);
4315 // bpo-42536: The GC may have untracked this result tuple. Since we're
4316 // recycling it, make sure it's tracked again:
4317 if (!_PyObject_GC_IS_TRACKED(result)) {
4318 _PyObject_GC_TRACK(result);
4319 }
4320 }
4321 else {
4322 result = PyTuple_New(2);
4323 if (result == NULL)
4324 return NULL;
4325 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
4326 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
4327 }
4328 return result;
4329
4330 fail:
4331 di->di_dict = NULL;
4332 Py_DECREF(d);
4333 return NULL;
4334 }
4335
4336 PyTypeObject PyDictIterItem_Type = {
4337 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4338 "dict_itemiterator", /* tp_name */
4339 sizeof(dictiterobject), /* tp_basicsize */
4340 0, /* tp_itemsize */
4341 /* methods */
4342 (destructor)dictiter_dealloc, /* tp_dealloc */
4343 0, /* tp_vectorcall_offset */
4344 0, /* tp_getattr */
4345 0, /* tp_setattr */
4346 0, /* tp_as_async */
4347 0, /* tp_repr */
4348 0, /* tp_as_number */
4349 0, /* tp_as_sequence */
4350 0, /* tp_as_mapping */
4351 0, /* tp_hash */
4352 0, /* tp_call */
4353 0, /* tp_str */
4354 PyObject_GenericGetAttr, /* tp_getattro */
4355 0, /* tp_setattro */
4356 0, /* tp_as_buffer */
4357 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4358 0, /* tp_doc */
4359 (traverseproc)dictiter_traverse, /* tp_traverse */
4360 0, /* tp_clear */
4361 0, /* tp_richcompare */
4362 0, /* tp_weaklistoffset */
4363 PyObject_SelfIter, /* tp_iter */
4364 (iternextfunc)dictiter_iternextitem, /* tp_iternext */
4365 dictiter_methods, /* tp_methods */
4366 0,
4367 };
4368
4369
4370 /* dictreviter */
4371
4372 static PyObject *
dictreviter_iternext(dictiterobject * di)4373 dictreviter_iternext(dictiterobject *di)
4374 {
4375 PyDictObject *d = di->di_dict;
4376
4377 if (d == NULL) {
4378 return NULL;
4379 }
4380 assert (PyDict_Check(d));
4381
4382 if (di->di_used != d->ma_used) {
4383 PyErr_SetString(PyExc_RuntimeError,
4384 "dictionary changed size during iteration");
4385 di->di_used = -1; /* Make this state sticky */
4386 return NULL;
4387 }
4388
4389 Py_ssize_t i = di->di_pos;
4390 PyDictKeysObject *k = d->ma_keys;
4391 PyObject *key, *value, *result;
4392
4393 if (i < 0) {
4394 goto fail;
4395 }
4396 if (d->ma_values) {
4397 int index = get_index_from_order(d, i);
4398 key = DK_UNICODE_ENTRIES(k)[index].me_key;
4399 value = d->ma_values->values[index];
4400 assert (value != NULL);
4401 }
4402 else {
4403 if (DK_IS_UNICODE(k)) {
4404 PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(k)[i];
4405 while (entry_ptr->me_value == NULL) {
4406 if (--i < 0) {
4407 goto fail;
4408 }
4409 entry_ptr--;
4410 }
4411 key = entry_ptr->me_key;
4412 value = entry_ptr->me_value;
4413 }
4414 else {
4415 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
4416 while (entry_ptr->me_value == NULL) {
4417 if (--i < 0) {
4418 goto fail;
4419 }
4420 entry_ptr--;
4421 }
4422 key = entry_ptr->me_key;
4423 value = entry_ptr->me_value;
4424 }
4425 }
4426 di->di_pos = i-1;
4427 di->len--;
4428
4429 if (Py_IS_TYPE(di, &PyDictRevIterKey_Type)) {
4430 Py_INCREF(key);
4431 return key;
4432 }
4433 else if (Py_IS_TYPE(di, &PyDictRevIterValue_Type)) {
4434 Py_INCREF(value);
4435 return value;
4436 }
4437 else if (Py_IS_TYPE(di, &PyDictRevIterItem_Type)) {
4438 Py_INCREF(key);
4439 Py_INCREF(value);
4440 result = di->di_result;
4441 if (Py_REFCNT(result) == 1) {
4442 PyObject *oldkey = PyTuple_GET_ITEM(result, 0);
4443 PyObject *oldvalue = PyTuple_GET_ITEM(result, 1);
4444 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
4445 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
4446 Py_INCREF(result);
4447 Py_DECREF(oldkey);
4448 Py_DECREF(oldvalue);
4449 // bpo-42536: The GC may have untracked this result tuple. Since
4450 // we're recycling it, make sure it's tracked again:
4451 if (!_PyObject_GC_IS_TRACKED(result)) {
4452 _PyObject_GC_TRACK(result);
4453 }
4454 }
4455 else {
4456 result = PyTuple_New(2);
4457 if (result == NULL) {
4458 return NULL;
4459 }
4460 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
4461 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
4462 }
4463 return result;
4464 }
4465 else {
4466 Py_UNREACHABLE();
4467 }
4468
4469 fail:
4470 di->di_dict = NULL;
4471 Py_DECREF(d);
4472 return NULL;
4473 }
4474
4475 PyTypeObject PyDictRevIterKey_Type = {
4476 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4477 "dict_reversekeyiterator",
4478 sizeof(dictiterobject),
4479 .tp_dealloc = (destructor)dictiter_dealloc,
4480 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
4481 .tp_traverse = (traverseproc)dictiter_traverse,
4482 .tp_iter = PyObject_SelfIter,
4483 .tp_iternext = (iternextfunc)dictreviter_iternext,
4484 .tp_methods = dictiter_methods
4485 };
4486
4487
4488 /*[clinic input]
4489 dict.__reversed__
4490
4491 Return a reverse iterator over the dict keys.
4492 [clinic start generated code]*/
4493
4494 static PyObject *
dict___reversed___impl(PyDictObject * self)4495 dict___reversed___impl(PyDictObject *self)
4496 /*[clinic end generated code: output=e674483336d1ed51 input=23210ef3477d8c4d]*/
4497 {
4498 assert (PyDict_Check(self));
4499 return dictiter_new(self, &PyDictRevIterKey_Type);
4500 }
4501
4502 static PyObject *
dictiter_reduce(dictiterobject * di,PyObject * Py_UNUSED (ignored))4503 dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored))
4504 {
4505 /* copy the iterator state */
4506 dictiterobject tmp = *di;
4507 Py_XINCREF(tmp.di_dict);
4508
4509 PyObject *list = PySequence_List((PyObject*)&tmp);
4510 Py_XDECREF(tmp.di_dict);
4511 if (list == NULL) {
4512 return NULL;
4513 }
4514 return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), list);
4515 }
4516
4517 PyTypeObject PyDictRevIterItem_Type = {
4518 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4519 "dict_reverseitemiterator",
4520 sizeof(dictiterobject),
4521 .tp_dealloc = (destructor)dictiter_dealloc,
4522 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
4523 .tp_traverse = (traverseproc)dictiter_traverse,
4524 .tp_iter = PyObject_SelfIter,
4525 .tp_iternext = (iternextfunc)dictreviter_iternext,
4526 .tp_methods = dictiter_methods
4527 };
4528
4529 PyTypeObject PyDictRevIterValue_Type = {
4530 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4531 "dict_reversevalueiterator",
4532 sizeof(dictiterobject),
4533 .tp_dealloc = (destructor)dictiter_dealloc,
4534 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
4535 .tp_traverse = (traverseproc)dictiter_traverse,
4536 .tp_iter = PyObject_SelfIter,
4537 .tp_iternext = (iternextfunc)dictreviter_iternext,
4538 .tp_methods = dictiter_methods
4539 };
4540
4541 /***********************************************/
4542 /* View objects for keys(), items(), values(). */
4543 /***********************************************/
4544
4545 /* The instance lay-out is the same for all three; but the type differs. */
4546
4547 static void
dictview_dealloc(_PyDictViewObject * dv)4548 dictview_dealloc(_PyDictViewObject *dv)
4549 {
4550 /* bpo-31095: UnTrack is needed before calling any callbacks */
4551 _PyObject_GC_UNTRACK(dv);
4552 Py_XDECREF(dv->dv_dict);
4553 PyObject_GC_Del(dv);
4554 }
4555
4556 static int
dictview_traverse(_PyDictViewObject * dv,visitproc visit,void * arg)4557 dictview_traverse(_PyDictViewObject *dv, visitproc visit, void *arg)
4558 {
4559 Py_VISIT(dv->dv_dict);
4560 return 0;
4561 }
4562
4563 static Py_ssize_t
dictview_len(_PyDictViewObject * dv)4564 dictview_len(_PyDictViewObject *dv)
4565 {
4566 Py_ssize_t len = 0;
4567 if (dv->dv_dict != NULL)
4568 len = dv->dv_dict->ma_used;
4569 return len;
4570 }
4571
4572 PyObject *
_PyDictView_New(PyObject * dict,PyTypeObject * type)4573 _PyDictView_New(PyObject *dict, PyTypeObject *type)
4574 {
4575 _PyDictViewObject *dv;
4576 if (dict == NULL) {
4577 PyErr_BadInternalCall();
4578 return NULL;
4579 }
4580 if (!PyDict_Check(dict)) {
4581 /* XXX Get rid of this restriction later */
4582 PyErr_Format(PyExc_TypeError,
4583 "%s() requires a dict argument, not '%s'",
4584 type->tp_name, Py_TYPE(dict)->tp_name);
4585 return NULL;
4586 }
4587 dv = PyObject_GC_New(_PyDictViewObject, type);
4588 if (dv == NULL)
4589 return NULL;
4590 Py_INCREF(dict);
4591 dv->dv_dict = (PyDictObject *)dict;
4592 _PyObject_GC_TRACK(dv);
4593 return (PyObject *)dv;
4594 }
4595
4596 static PyObject *
dictview_mapping(PyObject * view,void * Py_UNUSED (ignored))4597 dictview_mapping(PyObject *view, void *Py_UNUSED(ignored)) {
4598 assert(view != NULL);
4599 assert(PyDictKeys_Check(view)
4600 || PyDictValues_Check(view)
4601 || PyDictItems_Check(view));
4602 PyObject *mapping = (PyObject *)((_PyDictViewObject *)view)->dv_dict;
4603 return PyDictProxy_New(mapping);
4604 }
4605
4606 static PyGetSetDef dictview_getset[] = {
4607 {"mapping", dictview_mapping, (setter)NULL,
4608 "dictionary that this view refers to", NULL},
4609 {0}
4610 };
4611
4612 /* TODO(guido): The views objects are not complete:
4613
4614 * support more set operations
4615 * support arbitrary mappings?
4616 - either these should be static or exported in dictobject.h
4617 - if public then they should probably be in builtins
4618 */
4619
4620 /* Return 1 if self is a subset of other, iterating over self;
4621 0 if not; -1 if an error occurred. */
4622 static int
all_contained_in(PyObject * self,PyObject * other)4623 all_contained_in(PyObject *self, PyObject *other)
4624 {
4625 PyObject *iter = PyObject_GetIter(self);
4626 int ok = 1;
4627
4628 if (iter == NULL)
4629 return -1;
4630 for (;;) {
4631 PyObject *next = PyIter_Next(iter);
4632 if (next == NULL) {
4633 if (PyErr_Occurred())
4634 ok = -1;
4635 break;
4636 }
4637 ok = PySequence_Contains(other, next);
4638 Py_DECREF(next);
4639 if (ok <= 0)
4640 break;
4641 }
4642 Py_DECREF(iter);
4643 return ok;
4644 }
4645
4646 static PyObject *
dictview_richcompare(PyObject * self,PyObject * other,int op)4647 dictview_richcompare(PyObject *self, PyObject *other, int op)
4648 {
4649 Py_ssize_t len_self, len_other;
4650 int ok;
4651 PyObject *result;
4652
4653 assert(self != NULL);
4654 assert(PyDictViewSet_Check(self));
4655 assert(other != NULL);
4656
4657 if (!PyAnySet_Check(other) && !PyDictViewSet_Check(other))
4658 Py_RETURN_NOTIMPLEMENTED;
4659
4660 len_self = PyObject_Size(self);
4661 if (len_self < 0)
4662 return NULL;
4663 len_other = PyObject_Size(other);
4664 if (len_other < 0)
4665 return NULL;
4666
4667 ok = 0;
4668 switch(op) {
4669
4670 case Py_NE:
4671 case Py_EQ:
4672 if (len_self == len_other)
4673 ok = all_contained_in(self, other);
4674 if (op == Py_NE && ok >= 0)
4675 ok = !ok;
4676 break;
4677
4678 case Py_LT:
4679 if (len_self < len_other)
4680 ok = all_contained_in(self, other);
4681 break;
4682
4683 case Py_LE:
4684 if (len_self <= len_other)
4685 ok = all_contained_in(self, other);
4686 break;
4687
4688 case Py_GT:
4689 if (len_self > len_other)
4690 ok = all_contained_in(other, self);
4691 break;
4692
4693 case Py_GE:
4694 if (len_self >= len_other)
4695 ok = all_contained_in(other, self);
4696 break;
4697
4698 }
4699 if (ok < 0)
4700 return NULL;
4701 result = ok ? Py_True : Py_False;
4702 Py_INCREF(result);
4703 return result;
4704 }
4705
4706 static PyObject *
dictview_repr(_PyDictViewObject * dv)4707 dictview_repr(_PyDictViewObject *dv)
4708 {
4709 PyObject *seq;
4710 PyObject *result = NULL;
4711 Py_ssize_t rc;
4712
4713 rc = Py_ReprEnter((PyObject *)dv);
4714 if (rc != 0) {
4715 return rc > 0 ? PyUnicode_FromString("...") : NULL;
4716 }
4717 seq = PySequence_List((PyObject *)dv);
4718 if (seq == NULL) {
4719 goto Done;
4720 }
4721 result = PyUnicode_FromFormat("%s(%R)", Py_TYPE(dv)->tp_name, seq);
4722 Py_DECREF(seq);
4723
4724 Done:
4725 Py_ReprLeave((PyObject *)dv);
4726 return result;
4727 }
4728
4729 /*** dict_keys ***/
4730
4731 static PyObject *
dictkeys_iter(_PyDictViewObject * dv)4732 dictkeys_iter(_PyDictViewObject *dv)
4733 {
4734 if (dv->dv_dict == NULL) {
4735 Py_RETURN_NONE;
4736 }
4737 return dictiter_new(dv->dv_dict, &PyDictIterKey_Type);
4738 }
4739
4740 static int
dictkeys_contains(_PyDictViewObject * dv,PyObject * obj)4741 dictkeys_contains(_PyDictViewObject *dv, PyObject *obj)
4742 {
4743 if (dv->dv_dict == NULL)
4744 return 0;
4745 return PyDict_Contains((PyObject *)dv->dv_dict, obj);
4746 }
4747
4748 static PySequenceMethods dictkeys_as_sequence = {
4749 (lenfunc)dictview_len, /* sq_length */
4750 0, /* sq_concat */
4751 0, /* sq_repeat */
4752 0, /* sq_item */
4753 0, /* sq_slice */
4754 0, /* sq_ass_item */
4755 0, /* sq_ass_slice */
4756 (objobjproc)dictkeys_contains, /* sq_contains */
4757 };
4758
4759 // Create an set object from dictviews object.
4760 // Returns a new reference.
4761 // This utility function is used by set operations.
4762 static PyObject*
dictviews_to_set(PyObject * self)4763 dictviews_to_set(PyObject *self)
4764 {
4765 PyObject *left = self;
4766 if (PyDictKeys_Check(self)) {
4767 // PySet_New() has fast path for the dict object.
4768 PyObject *dict = (PyObject *)((_PyDictViewObject *)self)->dv_dict;
4769 if (PyDict_CheckExact(dict)) {
4770 left = dict;
4771 }
4772 }
4773 return PySet_New(left);
4774 }
4775
4776 static PyObject*
dictviews_sub(PyObject * self,PyObject * other)4777 dictviews_sub(PyObject *self, PyObject *other)
4778 {
4779 PyObject *result = dictviews_to_set(self);
4780 if (result == NULL) {
4781 return NULL;
4782 }
4783
4784 PyObject *tmp = PyObject_CallMethodOneArg(
4785 result, &_Py_ID(difference_update), other);
4786 if (tmp == NULL) {
4787 Py_DECREF(result);
4788 return NULL;
4789 }
4790
4791 Py_DECREF(tmp);
4792 return result;
4793 }
4794
4795 static int
4796 dictitems_contains(_PyDictViewObject *dv, PyObject *obj);
4797
4798 PyObject *
_PyDictView_Intersect(PyObject * self,PyObject * other)4799 _PyDictView_Intersect(PyObject* self, PyObject *other)
4800 {
4801 PyObject *result;
4802 PyObject *it;
4803 PyObject *key;
4804 Py_ssize_t len_self;
4805 int rv;
4806 int (*dict_contains)(_PyDictViewObject *, PyObject *);
4807
4808 /* Python interpreter swaps parameters when dict view
4809 is on right side of & */
4810 if (!PyDictViewSet_Check(self)) {
4811 PyObject *tmp = other;
4812 other = self;
4813 self = tmp;
4814 }
4815
4816 len_self = dictview_len((_PyDictViewObject *)self);
4817
4818 /* if other is a set and self is smaller than other,
4819 reuse set intersection logic */
4820 if (PySet_CheckExact(other) && len_self <= PyObject_Size(other)) {
4821 return PyObject_CallMethodObjArgs(
4822 other, &_Py_ID(intersection), self, NULL);
4823 }
4824
4825 /* if other is another dict view, and it is bigger than self,
4826 swap them */
4827 if (PyDictViewSet_Check(other)) {
4828 Py_ssize_t len_other = dictview_len((_PyDictViewObject *)other);
4829 if (len_other > len_self) {
4830 PyObject *tmp = other;
4831 other = self;
4832 self = tmp;
4833 }
4834 }
4835
4836 /* at this point, two things should be true
4837 1. self is a dictview
4838 2. if other is a dictview then it is smaller than self */
4839 result = PySet_New(NULL);
4840 if (result == NULL)
4841 return NULL;
4842
4843 it = PyObject_GetIter(other);
4844 if (it == NULL) {
4845 Py_DECREF(result);
4846 return NULL;
4847 }
4848
4849 if (PyDictKeys_Check(self)) {
4850 dict_contains = dictkeys_contains;
4851 }
4852 /* else PyDictItems_Check(self) */
4853 else {
4854 dict_contains = dictitems_contains;
4855 }
4856
4857 while ((key = PyIter_Next(it)) != NULL) {
4858 rv = dict_contains((_PyDictViewObject *)self, key);
4859 if (rv < 0) {
4860 goto error;
4861 }
4862 if (rv) {
4863 if (PySet_Add(result, key)) {
4864 goto error;
4865 }
4866 }
4867 Py_DECREF(key);
4868 }
4869 Py_DECREF(it);
4870 if (PyErr_Occurred()) {
4871 Py_DECREF(result);
4872 return NULL;
4873 }
4874 return result;
4875
4876 error:
4877 Py_DECREF(it);
4878 Py_DECREF(result);
4879 Py_DECREF(key);
4880 return NULL;
4881 }
4882
4883 static PyObject*
dictviews_or(PyObject * self,PyObject * other)4884 dictviews_or(PyObject* self, PyObject *other)
4885 {
4886 PyObject *result = dictviews_to_set(self);
4887 if (result == NULL) {
4888 return NULL;
4889 }
4890
4891 if (_PySet_Update(result, other) < 0) {
4892 Py_DECREF(result);
4893 return NULL;
4894 }
4895 return result;
4896 }
4897
4898 static PyObject *
dictitems_xor(PyObject * self,PyObject * other)4899 dictitems_xor(PyObject *self, PyObject *other)
4900 {
4901 assert(PyDictItems_Check(self));
4902 assert(PyDictItems_Check(other));
4903 PyObject *d1 = (PyObject *)((_PyDictViewObject *)self)->dv_dict;
4904 PyObject *d2 = (PyObject *)((_PyDictViewObject *)other)->dv_dict;
4905
4906 PyObject *temp_dict = PyDict_Copy(d1);
4907 if (temp_dict == NULL) {
4908 return NULL;
4909 }
4910 PyObject *result_set = PySet_New(NULL);
4911 if (result_set == NULL) {
4912 Py_CLEAR(temp_dict);
4913 return NULL;
4914 }
4915
4916 PyObject *key = NULL, *val1 = NULL, *val2 = NULL;
4917 Py_ssize_t pos = 0;
4918 Py_hash_t hash;
4919
4920 while (_PyDict_Next(d2, &pos, &key, &val2, &hash)) {
4921 Py_INCREF(key);
4922 Py_INCREF(val2);
4923 val1 = _PyDict_GetItem_KnownHash(temp_dict, key, hash);
4924
4925 int to_delete;
4926 if (val1 == NULL) {
4927 if (PyErr_Occurred()) {
4928 goto error;
4929 }
4930 to_delete = 0;
4931 }
4932 else {
4933 Py_INCREF(val1);
4934 to_delete = PyObject_RichCompareBool(val1, val2, Py_EQ);
4935 if (to_delete < 0) {
4936 goto error;
4937 }
4938 }
4939
4940 if (to_delete) {
4941 if (_PyDict_DelItem_KnownHash(temp_dict, key, hash) < 0) {
4942 goto error;
4943 }
4944 }
4945 else {
4946 PyObject *pair = PyTuple_Pack(2, key, val2);
4947 if (pair == NULL) {
4948 goto error;
4949 }
4950 if (PySet_Add(result_set, pair) < 0) {
4951 Py_DECREF(pair);
4952 goto error;
4953 }
4954 Py_DECREF(pair);
4955 }
4956 Py_DECREF(key);
4957 Py_XDECREF(val1);
4958 Py_DECREF(val2);
4959 }
4960 key = val1 = val2 = NULL;
4961
4962 PyObject *remaining_pairs = PyObject_CallMethodNoArgs(
4963 temp_dict, &_Py_ID(items));
4964 if (remaining_pairs == NULL) {
4965 goto error;
4966 }
4967 if (_PySet_Update(result_set, remaining_pairs) < 0) {
4968 Py_DECREF(remaining_pairs);
4969 goto error;
4970 }
4971 Py_DECREF(temp_dict);
4972 Py_DECREF(remaining_pairs);
4973 return result_set;
4974
4975 error:
4976 Py_XDECREF(temp_dict);
4977 Py_XDECREF(result_set);
4978 Py_XDECREF(key);
4979 Py_XDECREF(val1);
4980 Py_XDECREF(val2);
4981 return NULL;
4982 }
4983
4984 static PyObject*
dictviews_xor(PyObject * self,PyObject * other)4985 dictviews_xor(PyObject* self, PyObject *other)
4986 {
4987 if (PyDictItems_Check(self) && PyDictItems_Check(other)) {
4988 return dictitems_xor(self, other);
4989 }
4990 PyObject *result = dictviews_to_set(self);
4991 if (result == NULL) {
4992 return NULL;
4993 }
4994
4995 PyObject *tmp = PyObject_CallMethodOneArg(
4996 result, &_Py_ID(symmetric_difference_update), other);
4997 if (tmp == NULL) {
4998 Py_DECREF(result);
4999 return NULL;
5000 }
5001
5002 Py_DECREF(tmp);
5003 return result;
5004 }
5005
5006 static PyNumberMethods dictviews_as_number = {
5007 0, /*nb_add*/
5008 (binaryfunc)dictviews_sub, /*nb_subtract*/
5009 0, /*nb_multiply*/
5010 0, /*nb_remainder*/
5011 0, /*nb_divmod*/
5012 0, /*nb_power*/
5013 0, /*nb_negative*/
5014 0, /*nb_positive*/
5015 0, /*nb_absolute*/
5016 0, /*nb_bool*/
5017 0, /*nb_invert*/
5018 0, /*nb_lshift*/
5019 0, /*nb_rshift*/
5020 (binaryfunc)_PyDictView_Intersect, /*nb_and*/
5021 (binaryfunc)dictviews_xor, /*nb_xor*/
5022 (binaryfunc)dictviews_or, /*nb_or*/
5023 };
5024
5025 static PyObject*
dictviews_isdisjoint(PyObject * self,PyObject * other)5026 dictviews_isdisjoint(PyObject *self, PyObject *other)
5027 {
5028 PyObject *it;
5029 PyObject *item = NULL;
5030
5031 if (self == other) {
5032 if (dictview_len((_PyDictViewObject *)self) == 0)
5033 Py_RETURN_TRUE;
5034 else
5035 Py_RETURN_FALSE;
5036 }
5037
5038 /* Iterate over the shorter object (only if other is a set,
5039 * because PySequence_Contains may be expensive otherwise): */
5040 if (PyAnySet_Check(other) || PyDictViewSet_Check(other)) {
5041 Py_ssize_t len_self = dictview_len((_PyDictViewObject *)self);
5042 Py_ssize_t len_other = PyObject_Size(other);
5043 if (len_other == -1)
5044 return NULL;
5045
5046 if ((len_other > len_self)) {
5047 PyObject *tmp = other;
5048 other = self;
5049 self = tmp;
5050 }
5051 }
5052
5053 it = PyObject_GetIter(other);
5054 if (it == NULL)
5055 return NULL;
5056
5057 while ((item = PyIter_Next(it)) != NULL) {
5058 int contains = PySequence_Contains(self, item);
5059 Py_DECREF(item);
5060 if (contains == -1) {
5061 Py_DECREF(it);
5062 return NULL;
5063 }
5064
5065 if (contains) {
5066 Py_DECREF(it);
5067 Py_RETURN_FALSE;
5068 }
5069 }
5070 Py_DECREF(it);
5071 if (PyErr_Occurred())
5072 return NULL; /* PyIter_Next raised an exception. */
5073 Py_RETURN_TRUE;
5074 }
5075
5076 PyDoc_STRVAR(isdisjoint_doc,
5077 "Return True if the view and the given iterable have a null intersection.");
5078
5079 static PyObject* dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored));
5080
5081 PyDoc_STRVAR(reversed_keys_doc,
5082 "Return a reverse iterator over the dict keys.");
5083
5084 static PyMethodDef dictkeys_methods[] = {
5085 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
5086 isdisjoint_doc},
5087 {"__reversed__", _PyCFunction_CAST(dictkeys_reversed), METH_NOARGS,
5088 reversed_keys_doc},
5089 {NULL, NULL} /* sentinel */
5090 };
5091
5092 PyTypeObject PyDictKeys_Type = {
5093 PyVarObject_HEAD_INIT(&PyType_Type, 0)
5094 "dict_keys", /* tp_name */
5095 sizeof(_PyDictViewObject), /* tp_basicsize */
5096 0, /* tp_itemsize */
5097 /* methods */
5098 (destructor)dictview_dealloc, /* tp_dealloc */
5099 0, /* tp_vectorcall_offset */
5100 0, /* tp_getattr */
5101 0, /* tp_setattr */
5102 0, /* tp_as_async */
5103 (reprfunc)dictview_repr, /* tp_repr */
5104 &dictviews_as_number, /* tp_as_number */
5105 &dictkeys_as_sequence, /* tp_as_sequence */
5106 0, /* tp_as_mapping */
5107 0, /* tp_hash */
5108 0, /* tp_call */
5109 0, /* tp_str */
5110 PyObject_GenericGetAttr, /* tp_getattro */
5111 0, /* tp_setattro */
5112 0, /* tp_as_buffer */
5113 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5114 0, /* tp_doc */
5115 (traverseproc)dictview_traverse, /* tp_traverse */
5116 0, /* tp_clear */
5117 dictview_richcompare, /* tp_richcompare */
5118 0, /* tp_weaklistoffset */
5119 (getiterfunc)dictkeys_iter, /* tp_iter */
5120 0, /* tp_iternext */
5121 dictkeys_methods, /* tp_methods */
5122 .tp_getset = dictview_getset,
5123 };
5124
5125 static PyObject *
dictkeys_new(PyObject * dict,PyObject * Py_UNUSED (ignored))5126 dictkeys_new(PyObject *dict, PyObject *Py_UNUSED(ignored))
5127 {
5128 return _PyDictView_New(dict, &PyDictKeys_Type);
5129 }
5130
5131 static PyObject *
dictkeys_reversed(_PyDictViewObject * dv,PyObject * Py_UNUSED (ignored))5132 dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
5133 {
5134 if (dv->dv_dict == NULL) {
5135 Py_RETURN_NONE;
5136 }
5137 return dictiter_new(dv->dv_dict, &PyDictRevIterKey_Type);
5138 }
5139
5140 /*** dict_items ***/
5141
5142 static PyObject *
dictitems_iter(_PyDictViewObject * dv)5143 dictitems_iter(_PyDictViewObject *dv)
5144 {
5145 if (dv->dv_dict == NULL) {
5146 Py_RETURN_NONE;
5147 }
5148 return dictiter_new(dv->dv_dict, &PyDictIterItem_Type);
5149 }
5150
5151 static int
dictitems_contains(_PyDictViewObject * dv,PyObject * obj)5152 dictitems_contains(_PyDictViewObject *dv, PyObject *obj)
5153 {
5154 int result;
5155 PyObject *key, *value, *found;
5156 if (dv->dv_dict == NULL)
5157 return 0;
5158 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 2)
5159 return 0;
5160 key = PyTuple_GET_ITEM(obj, 0);
5161 value = PyTuple_GET_ITEM(obj, 1);
5162 found = PyDict_GetItemWithError((PyObject *)dv->dv_dict, key);
5163 if (found == NULL) {
5164 if (PyErr_Occurred())
5165 return -1;
5166 return 0;
5167 }
5168 Py_INCREF(found);
5169 result = PyObject_RichCompareBool(found, value, Py_EQ);
5170 Py_DECREF(found);
5171 return result;
5172 }
5173
5174 static PySequenceMethods dictitems_as_sequence = {
5175 (lenfunc)dictview_len, /* sq_length */
5176 0, /* sq_concat */
5177 0, /* sq_repeat */
5178 0, /* sq_item */
5179 0, /* sq_slice */
5180 0, /* sq_ass_item */
5181 0, /* sq_ass_slice */
5182 (objobjproc)dictitems_contains, /* sq_contains */
5183 };
5184
5185 static PyObject* dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored));
5186
5187 PyDoc_STRVAR(reversed_items_doc,
5188 "Return a reverse iterator over the dict items.");
5189
5190 static PyMethodDef dictitems_methods[] = {
5191 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
5192 isdisjoint_doc},
5193 {"__reversed__", (PyCFunction)dictitems_reversed, METH_NOARGS,
5194 reversed_items_doc},
5195 {NULL, NULL} /* sentinel */
5196 };
5197
5198 PyTypeObject PyDictItems_Type = {
5199 PyVarObject_HEAD_INIT(&PyType_Type, 0)
5200 "dict_items", /* tp_name */
5201 sizeof(_PyDictViewObject), /* tp_basicsize */
5202 0, /* tp_itemsize */
5203 /* methods */
5204 (destructor)dictview_dealloc, /* tp_dealloc */
5205 0, /* tp_vectorcall_offset */
5206 0, /* tp_getattr */
5207 0, /* tp_setattr */
5208 0, /* tp_as_async */
5209 (reprfunc)dictview_repr, /* tp_repr */
5210 &dictviews_as_number, /* tp_as_number */
5211 &dictitems_as_sequence, /* tp_as_sequence */
5212 0, /* tp_as_mapping */
5213 0, /* tp_hash */
5214 0, /* tp_call */
5215 0, /* tp_str */
5216 PyObject_GenericGetAttr, /* tp_getattro */
5217 0, /* tp_setattro */
5218 0, /* tp_as_buffer */
5219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5220 0, /* tp_doc */
5221 (traverseproc)dictview_traverse, /* tp_traverse */
5222 0, /* tp_clear */
5223 dictview_richcompare, /* tp_richcompare */
5224 0, /* tp_weaklistoffset */
5225 (getiterfunc)dictitems_iter, /* tp_iter */
5226 0, /* tp_iternext */
5227 dictitems_methods, /* tp_methods */
5228 .tp_getset = dictview_getset,
5229 };
5230
5231 static PyObject *
dictitems_new(PyObject * dict,PyObject * Py_UNUSED (ignored))5232 dictitems_new(PyObject *dict, PyObject *Py_UNUSED(ignored))
5233 {
5234 return _PyDictView_New(dict, &PyDictItems_Type);
5235 }
5236
5237 static PyObject *
dictitems_reversed(_PyDictViewObject * dv,PyObject * Py_UNUSED (ignored))5238 dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
5239 {
5240 if (dv->dv_dict == NULL) {
5241 Py_RETURN_NONE;
5242 }
5243 return dictiter_new(dv->dv_dict, &PyDictRevIterItem_Type);
5244 }
5245
5246 /*** dict_values ***/
5247
5248 static PyObject *
dictvalues_iter(_PyDictViewObject * dv)5249 dictvalues_iter(_PyDictViewObject *dv)
5250 {
5251 if (dv->dv_dict == NULL) {
5252 Py_RETURN_NONE;
5253 }
5254 return dictiter_new(dv->dv_dict, &PyDictIterValue_Type);
5255 }
5256
5257 static PySequenceMethods dictvalues_as_sequence = {
5258 (lenfunc)dictview_len, /* sq_length */
5259 0, /* sq_concat */
5260 0, /* sq_repeat */
5261 0, /* sq_item */
5262 0, /* sq_slice */
5263 0, /* sq_ass_item */
5264 0, /* sq_ass_slice */
5265 (objobjproc)0, /* sq_contains */
5266 };
5267
5268 static PyObject* dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored));
5269
5270 PyDoc_STRVAR(reversed_values_doc,
5271 "Return a reverse iterator over the dict values.");
5272
5273 static PyMethodDef dictvalues_methods[] = {
5274 {"__reversed__", (PyCFunction)dictvalues_reversed, METH_NOARGS,
5275 reversed_values_doc},
5276 {NULL, NULL} /* sentinel */
5277 };
5278
5279 PyTypeObject PyDictValues_Type = {
5280 PyVarObject_HEAD_INIT(&PyType_Type, 0)
5281 "dict_values", /* tp_name */
5282 sizeof(_PyDictViewObject), /* tp_basicsize */
5283 0, /* tp_itemsize */
5284 /* methods */
5285 (destructor)dictview_dealloc, /* tp_dealloc */
5286 0, /* tp_vectorcall_offset */
5287 0, /* tp_getattr */
5288 0, /* tp_setattr */
5289 0, /* tp_as_async */
5290 (reprfunc)dictview_repr, /* tp_repr */
5291 0, /* tp_as_number */
5292 &dictvalues_as_sequence, /* tp_as_sequence */
5293 0, /* tp_as_mapping */
5294 0, /* tp_hash */
5295 0, /* tp_call */
5296 0, /* tp_str */
5297 PyObject_GenericGetAttr, /* tp_getattro */
5298 0, /* tp_setattro */
5299 0, /* tp_as_buffer */
5300 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5301 0, /* tp_doc */
5302 (traverseproc)dictview_traverse, /* tp_traverse */
5303 0, /* tp_clear */
5304 0, /* tp_richcompare */
5305 0, /* tp_weaklistoffset */
5306 (getiterfunc)dictvalues_iter, /* tp_iter */
5307 0, /* tp_iternext */
5308 dictvalues_methods, /* tp_methods */
5309 .tp_getset = dictview_getset,
5310 };
5311
5312 static PyObject *
dictvalues_new(PyObject * dict,PyObject * Py_UNUSED (ignored))5313 dictvalues_new(PyObject *dict, PyObject *Py_UNUSED(ignored))
5314 {
5315 return _PyDictView_New(dict, &PyDictValues_Type);
5316 }
5317
5318 static PyObject *
dictvalues_reversed(_PyDictViewObject * dv,PyObject * Py_UNUSED (ignored))5319 dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
5320 {
5321 if (dv->dv_dict == NULL) {
5322 Py_RETURN_NONE;
5323 }
5324 return dictiter_new(dv->dv_dict, &PyDictRevIterValue_Type);
5325 }
5326
5327
5328 /* Returns NULL if cannot allocate a new PyDictKeysObject,
5329 but does not set an error */
5330 PyDictKeysObject *
_PyDict_NewKeysForClass(void)5331 _PyDict_NewKeysForClass(void)
5332 {
5333 PyDictKeysObject *keys = new_keys_object(NEXT_LOG2_SHARED_KEYS_MAX_SIZE, 1);
5334 if (keys == NULL) {
5335 PyErr_Clear();
5336 }
5337 else {
5338 assert(keys->dk_nentries == 0);
5339 /* Set to max size+1 as it will shrink by one before each new object */
5340 keys->dk_usable = SHARED_KEYS_MAX_SIZE;
5341 keys->dk_kind = DICT_KEYS_SPLIT;
5342 }
5343 return keys;
5344 }
5345
5346 #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
5347
5348 static int
init_inline_values(PyObject * obj,PyTypeObject * tp)5349 init_inline_values(PyObject *obj, PyTypeObject *tp)
5350 {
5351 assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
5352 // assert(type->tp_dictoffset > 0); -- TO DO Update this assert.
5353 assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5354 PyDictKeysObject *keys = CACHED_KEYS(tp);
5355 assert(keys != NULL);
5356 if (keys->dk_usable > 1) {
5357 keys->dk_usable--;
5358 }
5359 Py_ssize_t size = shared_keys_usable_size(keys);
5360 assert(size > 0);
5361 PyDictValues *values = new_values(size);
5362 if (values == NULL) {
5363 PyErr_NoMemory();
5364 return -1;
5365 }
5366 assert(((uint8_t *)values)[-1] >= size+2);
5367 ((uint8_t *)values)[-2] = 0;
5368 for (int i = 0; i < size; i++) {
5369 values->values[i] = NULL;
5370 }
5371 *_PyObject_ValuesPointer(obj) = values;
5372 return 0;
5373 }
5374
5375 int
_PyObject_InitializeDict(PyObject * obj)5376 _PyObject_InitializeDict(PyObject *obj)
5377 {
5378 PyTypeObject *tp = Py_TYPE(obj);
5379 if (tp->tp_dictoffset == 0) {
5380 return 0;
5381 }
5382 if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
5383 OBJECT_STAT_INC(new_values);
5384 return init_inline_values(obj, tp);
5385 }
5386 PyObject *dict;
5387 if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
5388 dictkeys_incref(CACHED_KEYS(tp));
5389 dict = new_dict_with_shared_keys(CACHED_KEYS(tp));
5390 }
5391 else {
5392 dict = PyDict_New();
5393 }
5394 if (dict == NULL) {
5395 return -1;
5396 }
5397 PyObject **dictptr = _PyObject_DictPointer(obj);
5398 *dictptr = dict;
5399 return 0;
5400 }
5401
5402 static PyObject *
make_dict_from_instance_attributes(PyDictKeysObject * keys,PyDictValues * values)5403 make_dict_from_instance_attributes(PyDictKeysObject *keys, PyDictValues *values)
5404 {
5405 dictkeys_incref(keys);
5406 Py_ssize_t used = 0;
5407 Py_ssize_t track = 0;
5408 for (Py_ssize_t i = 0; i < shared_keys_usable_size(keys); i++) {
5409 PyObject *val = values->values[i];
5410 if (val != NULL) {
5411 used += 1;
5412 track += _PyObject_GC_MAY_BE_TRACKED(val);
5413 }
5414 }
5415 PyObject *res = new_dict(keys, values, used, 0);
5416 if (track && res) {
5417 _PyObject_GC_TRACK(res);
5418 }
5419 return res;
5420 }
5421
5422 PyObject *
_PyObject_MakeDictFromInstanceAttributes(PyObject * obj,PyDictValues * values)5423 _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values)
5424 {
5425 assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5426 PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj));
5427 OBJECT_STAT_INC(dict_materialized_on_request);
5428 return make_dict_from_instance_attributes(keys, values);
5429 }
5430
5431 int
_PyObject_StoreInstanceAttribute(PyObject * obj,PyDictValues * values,PyObject * name,PyObject * value)5432 _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
5433 PyObject *name, PyObject *value)
5434 {
5435 PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj));
5436 assert(keys != NULL);
5437 assert(values != NULL);
5438 assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5439 Py_ssize_t ix = DKIX_EMPTY;
5440 if (PyUnicode_CheckExact(name)) {
5441 ix = insert_into_dictkeys(keys, name);
5442 }
5443 if (ix == DKIX_EMPTY) {
5444 #ifdef Py_STATS
5445 if (PyUnicode_CheckExact(name)) {
5446 if (shared_keys_usable_size(keys) == SHARED_KEYS_MAX_SIZE) {
5447 OBJECT_STAT_INC(dict_materialized_too_big);
5448 }
5449 else {
5450 OBJECT_STAT_INC(dict_materialized_new_key);
5451 }
5452 }
5453 else {
5454 OBJECT_STAT_INC(dict_materialized_str_subclass);
5455 }
5456 #endif
5457 PyObject *dict = make_dict_from_instance_attributes(keys, values);
5458 if (dict == NULL) {
5459 return -1;
5460 }
5461 *_PyObject_ValuesPointer(obj) = NULL;
5462 *_PyObject_ManagedDictPointer(obj) = dict;
5463 if (value == NULL) {
5464 return PyDict_DelItem(dict, name);
5465 }
5466 else {
5467 return PyDict_SetItem(dict, name, value);
5468 }
5469 }
5470 PyObject *old_value = values->values[ix];
5471 Py_XINCREF(value);
5472 values->values[ix] = value;
5473 if (old_value == NULL) {
5474 if (value == NULL) {
5475 PyErr_Format(PyExc_AttributeError,
5476 "'%.100s' object has no attribute '%U'",
5477 Py_TYPE(obj)->tp_name, name);
5478 return -1;
5479 }
5480 _PyDictValues_AddToInsertionOrder(values, ix);
5481 }
5482 else {
5483 if (value == NULL) {
5484 delete_index_from_values(values, ix);
5485 }
5486 Py_DECREF(old_value);
5487 }
5488 return 0;
5489 }
5490
5491 PyObject *
_PyObject_GetInstanceAttribute(PyObject * obj,PyDictValues * values,PyObject * name)5492 _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values,
5493 PyObject *name)
5494 {
5495 assert(PyUnicode_CheckExact(name));
5496 PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj));
5497 assert(keys != NULL);
5498 Py_ssize_t ix = _PyDictKeys_StringLookup(keys, name);
5499 if (ix == DKIX_EMPTY) {
5500 return NULL;
5501 }
5502 PyObject *value = values->values[ix];
5503 Py_XINCREF(value);
5504 return value;
5505 }
5506
5507 int
_PyObject_IsInstanceDictEmpty(PyObject * obj)5508 _PyObject_IsInstanceDictEmpty(PyObject *obj)
5509 {
5510 PyTypeObject *tp = Py_TYPE(obj);
5511 if (tp->tp_dictoffset == 0) {
5512 return 1;
5513 }
5514 PyObject **dictptr;
5515 if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
5516 PyDictValues *values = *_PyObject_ValuesPointer(obj);
5517 if (values) {
5518 PyDictKeysObject *keys = CACHED_KEYS(tp);
5519 for (Py_ssize_t i = 0; i < keys->dk_nentries; i++) {
5520 if (values->values[i] != NULL) {
5521 return 0;
5522 }
5523 }
5524 return 1;
5525 }
5526 dictptr = _PyObject_ManagedDictPointer(obj);
5527 }
5528 else {
5529 dictptr = _PyObject_DictPointer(obj);
5530 }
5531 PyObject *dict = *dictptr;
5532 if (dict == NULL) {
5533 return 1;
5534 }
5535 return ((PyDictObject *)dict)->ma_used == 0;
5536 }
5537
5538
5539 int
_PyObject_VisitInstanceAttributes(PyObject * self,visitproc visit,void * arg)5540 _PyObject_VisitInstanceAttributes(PyObject *self, visitproc visit, void *arg)
5541 {
5542 PyTypeObject *tp = Py_TYPE(self);
5543 assert(Py_TYPE(self)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5544 PyDictValues **values_ptr = _PyObject_ValuesPointer(self);
5545 if (*values_ptr == NULL) {
5546 return 0;
5547 }
5548 PyDictKeysObject *keys = CACHED_KEYS(tp);
5549 for (Py_ssize_t i = 0; i < keys->dk_nentries; i++) {
5550 Py_VISIT((*values_ptr)->values[i]);
5551 }
5552 return 0;
5553 }
5554
5555 void
_PyObject_ClearInstanceAttributes(PyObject * self)5556 _PyObject_ClearInstanceAttributes(PyObject *self)
5557 {
5558 PyTypeObject *tp = Py_TYPE(self);
5559 assert(Py_TYPE(self)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5560 PyDictValues **values_ptr = _PyObject_ValuesPointer(self);
5561 if (*values_ptr == NULL) {
5562 return;
5563 }
5564 PyDictKeysObject *keys = CACHED_KEYS(tp);
5565 for (Py_ssize_t i = 0; i < keys->dk_nentries; i++) {
5566 Py_CLEAR((*values_ptr)->values[i]);
5567 }
5568 }
5569
5570 void
_PyObject_FreeInstanceAttributes(PyObject * self)5571 _PyObject_FreeInstanceAttributes(PyObject *self)
5572 {
5573 PyTypeObject *tp = Py_TYPE(self);
5574 assert(Py_TYPE(self)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
5575 PyDictValues **values_ptr = _PyObject_ValuesPointer(self);
5576 PyDictValues *values = *values_ptr;
5577 if (values == NULL) {
5578 return;
5579 }
5580 *values_ptr = NULL;
5581 PyDictKeysObject *keys = CACHED_KEYS(tp);
5582 for (Py_ssize_t i = 0; i < keys->dk_nentries; i++) {
5583 Py_XDECREF(values->values[i]);
5584 }
5585 free_values(values);
5586 }
5587
5588 PyObject *
PyObject_GenericGetDict(PyObject * obj,void * context)5589 PyObject_GenericGetDict(PyObject *obj, void *context)
5590 {
5591 PyObject *dict;
5592 PyTypeObject *tp = Py_TYPE(obj);
5593 if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) {
5594 PyDictValues **values_ptr = _PyObject_ValuesPointer(obj);
5595 PyObject **dictptr = _PyObject_ManagedDictPointer(obj);
5596 if (*values_ptr) {
5597 assert(*dictptr == NULL);
5598 OBJECT_STAT_INC(dict_materialized_on_request);
5599 *dictptr = dict = make_dict_from_instance_attributes(CACHED_KEYS(tp), *values_ptr);
5600 if (dict != NULL) {
5601 *values_ptr = NULL;
5602 }
5603 }
5604 else if (*dictptr == NULL) {
5605 *dictptr = dict = PyDict_New();
5606 }
5607 else {
5608 dict = *dictptr;
5609 }
5610 }
5611 else {
5612 PyObject **dictptr = _PyObject_DictPointer(obj);
5613 if (dictptr == NULL) {
5614 PyErr_SetString(PyExc_AttributeError,
5615 "This object has no __dict__");
5616 return NULL;
5617 }
5618 dict = *dictptr;
5619 if (dict == NULL) {
5620 PyTypeObject *tp = Py_TYPE(obj);
5621 if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
5622 dictkeys_incref(CACHED_KEYS(tp));
5623 *dictptr = dict = new_dict_with_shared_keys(CACHED_KEYS(tp));
5624 }
5625 else {
5626 *dictptr = dict = PyDict_New();
5627 }
5628 }
5629 }
5630 Py_XINCREF(dict);
5631 return dict;
5632 }
5633
5634 int
_PyObjectDict_SetItem(PyTypeObject * tp,PyObject ** dictptr,PyObject * key,PyObject * value)5635 _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
5636 PyObject *key, PyObject *value)
5637 {
5638 PyObject *dict;
5639 int res;
5640 PyDictKeysObject *cached;
5641
5642 assert(dictptr != NULL);
5643 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
5644 assert(dictptr != NULL);
5645 dict = *dictptr;
5646 if (dict == NULL) {
5647 dictkeys_incref(cached);
5648 dict = new_dict_with_shared_keys(cached);
5649 if (dict == NULL)
5650 return -1;
5651 *dictptr = dict;
5652 }
5653 if (value == NULL) {
5654 res = PyDict_DelItem(dict, key);
5655 }
5656 else {
5657 res = PyDict_SetItem(dict, key, value);
5658 }
5659 } else {
5660 dict = *dictptr;
5661 if (dict == NULL) {
5662 dict = PyDict_New();
5663 if (dict == NULL)
5664 return -1;
5665 *dictptr = dict;
5666 }
5667 if (value == NULL) {
5668 res = PyDict_DelItem(dict, key);
5669 } else {
5670 res = PyDict_SetItem(dict, key, value);
5671 }
5672 }
5673 ASSERT_CONSISTENT(dict);
5674 return res;
5675 }
5676
5677 void
_PyDictKeys_DecRef(PyDictKeysObject * keys)5678 _PyDictKeys_DecRef(PyDictKeysObject *keys)
5679 {
5680 dictkeys_decref(keys);
5681 }
5682
5683 static uint32_t next_dict_keys_version = 2;
5684
_PyDictKeys_GetVersionForCurrentState(PyDictKeysObject * dictkeys)5685 uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys)
5686 {
5687 if (dictkeys->dk_version != 0) {
5688 return dictkeys->dk_version;
5689 }
5690 if (next_dict_keys_version == 0) {
5691 return 0;
5692 }
5693 uint32_t v = next_dict_keys_version++;
5694 dictkeys->dk_version = v;
5695 return v;
5696 }
5697