xref: /aosp_15_r20/external/skia/src/core/SkTHash.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2015 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkTHash_DEFINED
9 #define SkTHash_DEFINED
10 
11 #include "include/core/SkTypes.h"
12 #include "src/base/SkMathPriv.h"
13 #include "src/core/SkChecksum.h"
14 
15 #include <initializer_list>
16 #include <memory>
17 #include <new>
18 #include <type_traits>
19 #include <utility>
20 
21 namespace skia_private {
22 
23 // Before trying to use THashTable, look below to see if THashMap or THashSet works for you.
24 // They're easier to use, usually perform the same, and have fewer sharp edges.
25 
26 // T and K are treated as ordinary copyable C++ types.
27 // Traits must have:
28 //   - static K GetKey(T)
29 //   - static uint32_t Hash(K)
30 // If the key is large and stored inside T, you may want to make K a const&.
31 // Similarly, if T is large you might want it to be a pointer.
32 template <typename T, typename K, typename Traits = T>
33 class THashTable {
34 public:
35     THashTable()  = default;
36     ~THashTable() = default;
37 
THashTable(const THashTable & that)38     THashTable(const THashTable&  that) { *this = that; }
THashTable(THashTable && that)39     THashTable(      THashTable&& that) { *this = std::move(that); }
40 
41     THashTable& operator=(const THashTable& that) {
42         if (this != &that) {
43             fCount     = that.fCount;
44             fCapacity  = that.fCapacity;
45             fSlots.reset(new Slot[that.fCapacity]);
46             for (int i = 0; i < fCapacity; i++) {
47                 fSlots[i] = that.fSlots[i];
48             }
49         }
50         return *this;
51     }
52 
53     THashTable& operator=(THashTable&& that) {
54         if (this != &that) {
55             fCount    = that.fCount;
56             fCapacity = that.fCapacity;
57             fSlots    = std::move(that.fSlots);
58 
59             that.fCount = that.fCapacity = 0;
60         }
61         return *this;
62     }
63 
64     // Clear the table.
reset()65     void reset() { *this = THashTable(); }
66 
67     // How many entries are in the table?
count()68     int count() const { return fCount; }
69 
70     // How many slots does the table contain? (Note that unlike an array, hash tables can grow
71     // before reaching 100% capacity.)
capacity()72     int capacity() const { return fCapacity; }
73 
74     // Approximately how many bytes of memory do we use beyond sizeof(*this)?
approxBytesUsed()75     size_t approxBytesUsed() const { return fCapacity * sizeof(Slot); }
76 
77     // Exchange two hash tables.
swap(THashTable & that)78     void swap(THashTable& that) {
79         std::swap(fCount, that.fCount);
80         std::swap(fCapacity, that.fCapacity);
81         std::swap(fSlots, that.fSlots);
82     }
83 
swap(THashTable && that)84     void swap(THashTable&& that) {
85         *this = std::move(that);
86     }
87 
88     // !!!!!!!!!!!!!!!!!                 CAUTION                   !!!!!!!!!!!!!!!!!
89     // set(), find() and foreach() all allow mutable access to table entries.
90     // If you change an entry so that it no longer has the same key, all hell
91     // will break loose.  Do not do that!
92     //
93     // Please prefer to use THashMap or THashSet, which do not have this danger.
94 
95     // The pointers returned by set() and find() are valid only until the next call to set().
96     // The pointers you receive in foreach() are only valid for its duration.
97 
98     // Copy val into the hash table, returning a pointer to the copy now in the table.
99     // If there already is an entry in the table with the same key, we overwrite it.
set(T val)100     T* set(T val) {
101         if (4 * fCount >= 3 * fCapacity) {
102             this->resize(fCapacity > 0 ? fCapacity * 2 : 4);
103         }
104         return this->uncheckedSet(std::move(val));
105     }
106 
107     // If there is an entry in the table with this key, return a pointer to it.  If not, null.
find(const K & key)108     T* find(const K& key) const {
109         uint32_t hash = Hash(key);
110         int index = hash & (fCapacity-1);
111         for (int n = 0; n < fCapacity; n++) {
112             Slot& s = fSlots[index];
113             if (s.empty()) {
114                 return nullptr;
115             }
116             if (hash == s.fHash && key == Traits::GetKey(*s)) {
117                 return &*s;
118             }
119             index = this->next(index);
120         }
121         SkASSERT(fCapacity == fCount);
122         return nullptr;
123     }
124 
125     // If there is an entry in the table with this key, return it.  If not, null.
126     // This only works for pointer type T, and cannot be used to find an nullptr entry.
findOrNull(const K & key)127     T findOrNull(const K& key) const {
128         if (T* p = this->find(key)) {
129             return *p;
130         }
131         return nullptr;
132     }
133 
134     // If a value with this key exists in the hash table, removes it and returns true.
135     // Otherwise, returns false.
removeIfExists(const K & key)136     bool removeIfExists(const K& key) {
137         uint32_t hash = Hash(key);
138         int index = hash & (fCapacity-1);
139         for (int n = 0; n < fCapacity; n++) {
140             Slot& s = fSlots[index];
141             if (s.empty()) {
142                 return false;
143             }
144             if (hash == s.fHash && key == Traits::GetKey(*s)) {
145                 this->removeSlot(index);
146                 if (4 * fCount <= fCapacity && fCapacity > 4) {
147                     this->resize(fCapacity / 2);
148                 }
149                 return true;
150             }
151             index = this->next(index);
152         }
153         SkASSERT(fCapacity == fCount);
154         return false;
155     }
156 
157     // Removes the value with this key from the hash table. Asserts if it is missing.
remove(const K & key)158     void remove(const K& key) {
159         SkAssertResult(this->removeIfExists(key));
160     }
161 
162     // Hash tables will automatically resize themselves when set() and remove() are called, but
163     // resize() can be called to manually grow capacity before a bulk insertion.
resize(int capacity)164     void resize(int capacity) {
165         // We must have enough capacity to hold every key.
166         SkASSERT(capacity >= fCount);
167         // `capacity` must be a power of two, because we use `hash & (capacity-1)` to look up keys
168         // in the table (since this is faster than a modulo).
169         SkASSERT((capacity & (capacity - 1)) == 0);
170 
171         int oldCapacity = fCapacity;
172         SkDEBUGCODE(int oldCount = fCount);
173 
174         fCount = 0;
175         fCapacity = capacity;
176         std::unique_ptr<Slot[]> oldSlots = std::move(fSlots);
177         fSlots.reset(new Slot[capacity]);
178 
179         for (int i = 0; i < oldCapacity; i++) {
180             Slot& s = oldSlots[i];
181             if (s.has_value()) {
182                 this->uncheckedSet(*std::move(s));
183             }
184         }
185         SkASSERT(fCount == oldCount);
186     }
187 
188     // Reserve extra capacity. This only grows capacity; requests to shrink are ignored.
189     // We assume that the passed-in value represents the number of items that the caller wants to
190     // store in the table. The passed-in value is adjusted to honor the following rules:
191     // - Hash tables must have a power-of-two capacity.
192     // - Hash tables grow when they exceed 3/4 capacity, not when they are full.
reserve(int n)193     void reserve(int n) {
194         int newCapacity = SkNextPow2(n);
195         if (n * 4 > newCapacity * 3) {
196             newCapacity *= 2;
197         }
198 
199         if (newCapacity > fCapacity) {
200             this->resize(newCapacity);
201         }
202     }
203 
204     // Call fn on every entry in the table.  You may mutate the entries, but be very careful.
205     template <typename Fn>  // f(T*)
foreach(Fn && fn)206     void foreach(Fn&& fn) {
207         for (int i = 0; i < fCapacity; i++) {
208             if (fSlots[i].has_value()) {
209                 fn(&*fSlots[i]);
210             }
211         }
212     }
213 
214     // Call fn on every entry in the table.  You may not mutate anything.
215     template <typename Fn>  // f(T) or f(const T&)
foreach(Fn && fn)216     void foreach(Fn&& fn) const {
217         for (int i = 0; i < fCapacity; i++) {
218             if (fSlots[i].has_value()) {
219                 fn(*fSlots[i]);
220             }
221         }
222     }
223 
224     // A basic iterator-like class which disallows mutation; sufficient for range-based for loops.
225     // Intended for use by THashMap and THashSet via begin() and end().
226     // Adding or removing elements may invalidate all iterators.
227     template <typename SlotVal>
228     class Iter {
229     public:
230         using TTable = THashTable<T, K, Traits>;
231 
Iter(const TTable * table,int slot)232         Iter(const TTable* table, int slot) : fTable(table), fSlot(slot) {}
233 
MakeBegin(const TTable * table)234         static Iter MakeBegin(const TTable* table) {
235             return Iter{table, table->firstPopulatedSlot()};
236         }
237 
MakeEnd(const TTable * table)238         static Iter MakeEnd(const TTable* table) {
239             return Iter{table, table->capacity()};
240         }
241 
242         const SlotVal& operator*() const {
243             return *fTable->slot(fSlot);
244         }
245 
246         const SlotVal* operator->() const {
247             return fTable->slot(fSlot);
248         }
249 
250         bool operator==(const Iter& that) const {
251             // Iterators from different tables shouldn't be compared against each other.
252             SkASSERT(fTable == that.fTable);
253             return fSlot == that.fSlot;
254         }
255 
256         bool operator!=(const Iter& that) const {
257             return !(*this == that);
258         }
259 
260         Iter& operator++() {
261             fSlot = fTable->nextPopulatedSlot(fSlot);
262             return *this;
263         }
264 
265         Iter operator++(int) {
266             Iter old = *this;
267             this->operator++();
268             return old;
269         }
270 
271     protected:
272         const TTable* fTable;
273         int fSlot;
274     };
275 
276 private:
277     // Finds the first non-empty slot for an iterator.
firstPopulatedSlot()278     int firstPopulatedSlot() const {
279         for (int i = 0; i < fCapacity; i++) {
280             if (fSlots[i].has_value()) {
281                 return i;
282             }
283         }
284         return fCapacity;
285     }
286 
287     // Increments an iterator's slot.
nextPopulatedSlot(int currentSlot)288     int nextPopulatedSlot(int currentSlot) const {
289         for (int i = currentSlot + 1; i < fCapacity; i++) {
290             if (fSlots[i].has_value()) {
291                 return i;
292             }
293         }
294         return fCapacity;
295     }
296 
297     // Reads from an iterator's slot.
slot(int i)298     const T* slot(int i) const {
299         SkASSERT(fSlots[i].has_value());
300         return &*fSlots[i];
301     }
302 
uncheckedSet(T && val)303     T* uncheckedSet(T&& val) {
304         const K& key = Traits::GetKey(val);
305         SkASSERT(key == key);
306         uint32_t hash = Hash(key);
307         int index = hash & (fCapacity-1);
308         for (int n = 0; n < fCapacity; n++) {
309             Slot& s = fSlots[index];
310             if (s.empty()) {
311                 // New entry.
312                 s.emplace(std::move(val), hash);
313                 fCount++;
314                 return &*s;
315             }
316             if (hash == s.fHash && key == Traits::GetKey(*s)) {
317                 // Overwrite previous entry.
318                 // Note: this triggers extra copies when adding the same value repeatedly.
319                 s.emplace(std::move(val), hash);
320                 return &*s;
321             }
322 
323             index = this->next(index);
324         }
325         SkASSERT(false);
326         return nullptr;
327     }
328 
removeSlot(int index)329     void removeSlot(int index) {
330         fCount--;
331 
332         // Rearrange elements to restore the invariants for linear probing.
333         for (;;) {
334             Slot& emptySlot = fSlots[index];
335             int emptyIndex = index;
336             int originalIndex;
337             // Look for an element that can be moved into the empty slot.
338             // If the empty slot is in between where an element landed, and its native slot, then
339             // move it to the empty slot. Don't move it if its native slot is in between where
340             // the element landed and the empty slot.
341             // [native] <= [empty] < [candidate] == GOOD, can move candidate to empty slot
342             // [empty] < [native] < [candidate] == BAD, need to leave candidate where it is
343             do {
344                 index = this->next(index);
345                 Slot& s = fSlots[index];
346                 if (s.empty()) {
347                     // We're done shuffling elements around.  Clear the last empty slot.
348                     emptySlot.reset();
349                     return;
350                 }
351                 originalIndex = s.fHash & (fCapacity - 1);
352             } while ((index <= originalIndex && originalIndex < emptyIndex)
353                      || (originalIndex < emptyIndex && emptyIndex < index)
354                      || (emptyIndex < index && index <= originalIndex));
355             // Move the element to the empty slot.
356             Slot& moveFrom = fSlots[index];
357             emptySlot = std::move(moveFrom);
358         }
359     }
360 
next(int index)361     int next(int index) const {
362         index--;
363         if (index < 0) { index += fCapacity; }
364         return index;
365     }
366 
Hash(const K & key)367     static uint32_t Hash(const K& key) {
368         uint32_t hash = Traits::Hash(key) & 0xffffffff;
369         return hash ? hash : 1;  // We reserve hash 0 to mark empty.
370     }
371 
372     class Slot {
373     public:
374         Slot() = default;
~Slot()375         ~Slot() { this->reset(); }
376 
Slot(const Slot & that)377         Slot(const Slot& that) { *this = that; }
378         Slot& operator=(const Slot& that) {
379             if (this == &that) {
380                 return *this;
381             }
382             if (fHash) {
383                 if (that.fHash) {
384                     fVal.fStorage = that.fVal.fStorage;
385                     fHash = that.fHash;
386                 } else {
387                     this->reset();
388                 }
389             } else {
390                 if (that.fHash) {
391                     new (&fVal.fStorage) T(that.fVal.fStorage);
392                     fHash = that.fHash;
393                 } else {
394                     // do nothing, no value on either side
395                 }
396             }
397             return *this;
398         }
399 
Slot(Slot && that)400         Slot(Slot&& that) { *this = std::move(that); }
401         Slot& operator=(Slot&& that) {
402             if (this == &that) {
403                 return *this;
404             }
405             if (fHash) {
406                 if (that.fHash) {
407                     fVal.fStorage = std::move(that.fVal.fStorage);
408                     fHash = that.fHash;
409                 } else {
410                     this->reset();
411                 }
412             } else {
413                 if (that.fHash) {
414                     new (&fVal.fStorage) T(std::move(that.fVal.fStorage));
415                     fHash = that.fHash;
416                 } else {
417                     // do nothing, no value on either side
418                 }
419             }
420             return *this;
421         }
422 
423         T& operator*() & { return fVal.fStorage; }
424         const T& operator*() const& { return fVal.fStorage; }
425         T&& operator*() && { return std::move(fVal.fStorage); }
426         const T&& operator*() const&& { return std::move(fVal.fStorage); }
427 
emplace(T && v,uint32_t h)428         Slot& emplace(T&& v, uint32_t h) {
429             this->reset();
430             new (&fVal.fStorage) T(std::move(v));
431             fHash = h;
432             return *this;
433         }
434 
has_value()435         bool has_value() const { return fHash != 0; }
436         explicit operator bool() const { return this->has_value(); }
empty()437         bool empty() const { return !this->has_value(); }
438 
reset()439         void reset() {
440             if (fHash) {
441                 fVal.fStorage.~T();
442                 fHash = 0;
443             }
444         }
445 
446         uint32_t fHash = 0;
447 
448     private:
449         union Storage {
450             T fStorage;
Storage()451             Storage() {}
~Storage()452             ~Storage() {}
453         } fVal;
454     };
455 
456     int fCount    = 0,
457         fCapacity = 0;
458     std::unique_ptr<Slot[]> fSlots;
459 };
460 
461 // Maps K->V.  A more user-friendly wrapper around THashTable, suitable for most use cases.
462 // K and V are treated as ordinary copyable C++ types, with no assumed relationship between the two.
463 template <typename K, typename V, typename HashK = SkGoodHash>
464 class THashMap {
465 public:
466     // Allow default construction and assignment.
467     THashMap() = default;
468 
469     THashMap(THashMap<K, V, HashK>&& that) = default;
470     THashMap(const THashMap<K, V, HashK>& that) = default;
471 
472     THashMap<K, V, HashK>& operator=(THashMap<K, V, HashK>&& that) = default;
473     THashMap<K, V, HashK>& operator=(const THashMap<K, V, HashK>& that) = default;
474 
475     // Construct with an initializer list of key-value pairs.
476     struct Pair : public std::pair<K, V> {
477         using std::pair<K, V>::pair;
GetKeyPair478         static const K& GetKey(const Pair& p) { return p.first; }
HashPair479         static auto Hash(const K& key) { return HashK()(key); }
480     };
481 
THashMap(std::initializer_list<Pair> pairs)482     THashMap(std::initializer_list<Pair> pairs) {
483         int capacity = pairs.size() >= 4 ? SkNextPow2(pairs.size() * 4 / 3)
484                                          : 4;
485         fTable.resize(capacity);
486         for (const Pair& p : pairs) {
487             fTable.set(p);
488         }
489     }
490 
491     // Clear the map.
reset()492     void reset() { fTable.reset(); }
493 
494     // How many key/value pairs are in the table?
count()495     int count() const { return fTable.count(); }
496 
497     // Is empty?
empty()498     bool empty() const { return fTable.count() == 0; }
499 
500     // Approximately how many bytes of memory do we use beyond sizeof(*this)?
approxBytesUsed()501     size_t approxBytesUsed() const { return fTable.approxBytesUsed(); }
502 
503     // Reserve extra capacity.
reserve(int n)504     void reserve(int n) { fTable.reserve(n); }
505 
506     // Exchange two hash maps.
swap(THashMap & that)507     void swap(THashMap& that) { fTable.swap(that.fTable); }
swap(THashMap && that)508     void swap(THashMap&& that) { fTable.swap(std::move(that.fTable)); }
509 
510     // N.B. The pointers returned by set() and find() are valid only until the next call to set().
511 
512     // Set key to val in the table, replacing any previous value with the same key.
513     // We copy both key and val, and return a pointer to the value copy now in the table.
set(K key,V val)514     V* set(K key, V val) {
515         Pair* out = fTable.set({std::move(key), std::move(val)});
516         return &out->second;
517     }
518 
519     // If there is key/value entry in the table with this key, return a pointer to the value.
520     // If not, return null.
find(const K & key)521     V* find(const K& key) const {
522         if (Pair* p = fTable.find(key)) {
523             return &p->second;
524         }
525         return nullptr;
526     }
527 
528     V& operator[](const K& key) {
529         if (V* val = this->find(key)) {
530             return *val;
531         }
532         return *this->set(key, V{});
533     }
534 
535     // Removes the key/value entry in the table with this key. Asserts if the key is not present.
remove(const K & key)536     void remove(const K& key) {
537         fTable.remove(key);
538     }
539 
540     // If the key exists in the table, removes it and returns true. Otherwise, returns false.
removeIfExists(const K & key)541     bool removeIfExists(const K& key) {
542         return fTable.removeIfExists(key);
543     }
544 
545     // Call fn on every key/value pair in the table.  You may mutate the value but not the key.
546     template <typename Fn,  // f(K, V*) or f(const K&, V*)
547               std::enable_if_t<std::is_invocable_v<Fn, K, V*>>* = nullptr>
foreach(Fn && fn)548     void foreach(Fn&& fn) {
549         fTable.foreach([&fn](Pair* p) { fn(p->first, &p->second); });
550     }
551 
552     // Call fn on every key/value pair in the table.  You may not mutate anything.
553     template <typename Fn,  // f(K, V), f(const K&, V), f(K, const V&) or f(const K&, const V&).
554               std::enable_if_t<std::is_invocable_v<Fn, K, V>>* = nullptr>
foreach(Fn && fn)555     void foreach(Fn&& fn) const {
556         fTable.foreach([&fn](const Pair& p) { fn(p.first, p.second); });
557     }
558 
559     // Call fn on every key/value pair in the table.  You may not mutate anything.
560     template <typename Fn,  // f(Pair), or f(const Pair&)
561               std::enable_if_t<std::is_invocable_v<Fn, Pair>>* = nullptr>
foreach(Fn && fn)562     void foreach(Fn&& fn) const {
563         fTable.foreach([&fn](const Pair& p) { fn(p); });
564     }
565 
566     // Dereferencing an iterator gives back a key-value pair, suitable for structured binding.
567     using Iter = typename THashTable<Pair, K>::template Iter<std::pair<K, V>>;
568 
begin()569     Iter begin() const {
570         return Iter::MakeBegin(&fTable);
571     }
572 
end()573     Iter end() const {
574         return Iter::MakeEnd(&fTable);
575     }
576 
577 private:
578     THashTable<Pair, K> fTable;
579 };
580 
581 // A set of T.  T is treated as an ordinary copyable C++ type.
582 template <typename T, typename HashT = SkGoodHash>
583 class THashSet {
584 public:
585     // Allow default construction and assignment.
586     THashSet() = default;
587 
588     THashSet(THashSet<T, HashT>&& that) = default;
589     THashSet(const THashSet<T, HashT>& that) = default;
590 
591     THashSet<T, HashT>& operator=(THashSet<T, HashT>&& that) = default;
592     THashSet<T, HashT>& operator=(const THashSet<T, HashT>& that) = default;
593 
594     // Construct with an initializer list of Ts.
THashSet(std::initializer_list<T> vals)595     THashSet(std::initializer_list<T> vals) {
596         int capacity = vals.size() >= 4 ? SkNextPow2(vals.size() * 4 / 3)
597                                         : 4;
598         fTable.resize(capacity);
599         for (const T& val : vals) {
600             fTable.set(val);
601         }
602     }
603 
604     // Clear the set.
reset()605     void reset() { fTable.reset(); }
606 
607     // How many items are in the set?
count()608     int count() const { return fTable.count(); }
609 
610     // Is empty?
empty()611     bool empty() const { return fTable.count() == 0; }
612 
613     // Approximately how many bytes of memory do we use beyond sizeof(*this)?
approxBytesUsed()614     size_t approxBytesUsed() const { return fTable.approxBytesUsed(); }
615 
616     // Reserve extra capacity.
reserve(int n)617     void reserve(int n) { fTable.reserve(n); }
618 
619     // Exchange two hash sets.
swap(THashSet & that)620     void swap(THashSet& that) { fTable.swap(that.fTable); }
swap(THashSet && that)621     void swap(THashSet&& that) { fTable.swap(std::move(that.fTable)); }
622 
623     // Copy an item into the set.
add(T item)624     void add(T item) { fTable.set(std::move(item)); }
625 
626     // Is this item in the set?
contains(const T & item)627     bool contains(const T& item) const { return SkToBool(this->find(item)); }
628 
629     // If an item equal to this is in the set, return a pointer to it, otherwise null.
630     // This pointer remains valid until the next call to add().
find(const T & item)631     const T* find(const T& item) const { return fTable.find(item); }
632 
633     // Remove the item in the set equal to this.
remove(const T & item)634     void remove(const T& item) {
635         SkASSERT(this->contains(item));
636         fTable.remove(item);
637     }
638 
639     // Call fn on every item in the set.  You may not mutate anything.
640     template <typename Fn>  // f(T), f(const T&)
foreach(Fn && fn)641     void foreach (Fn&& fn) const {
642         fTable.foreach(fn);
643     }
644 
645 private:
646     struct Traits {
GetKeyTraits647         static const T& GetKey(const T& item) { return item; }
HashTraits648         static auto Hash(const T& item) { return HashT()(item); }
649     };
650 
651 public:
652     using Iter = typename THashTable<T, T, Traits>::template Iter<T>;
653 
begin()654     Iter begin() const {
655         return Iter::MakeBegin(&fTable);
656     }
657 
end()658     Iter end() const {
659         return Iter::MakeEnd(&fTable);
660     }
661 
662 private:
663     THashTable<T, T, Traits> fTable;
664 };
665 
666 }  // namespace skia_private
667 
668 #endif  // SkTHash_DEFINED
669