1 //===- llvm/ADT/SmallPtrSet.h - 'Normally small' pointer set ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the SmallPtrSet class.  See the doxygen comment for
11 /// SmallPtrSetImplBase for more details on the algorithm used.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ADT_SMALLPTRSET_H
16 #define LLVM_ADT_SMALLPTRSET_H
17 
18 #include "llvm/ADT/EpochTracker.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ReverseIteration.h"
21 #include "llvm/Support/type_traits.h"
22 #include <cassert>
23 #include <cstddef>
24 #include <cstdlib>
25 #include <cstring>
26 #include <initializer_list>
27 #include <iterator>
28 #include <limits>
29 #include <utility>
30 
31 namespace llvm {
32 
33 /// SmallPtrSetImplBase - This is the common code shared among all the
34 /// SmallPtrSet<>'s, which is almost everything.  SmallPtrSet has two modes, one
35 /// for small and one for large sets.
36 ///
37 /// Small sets use an array of pointers allocated in the SmallPtrSet object,
38 /// which is treated as a simple array of pointers.  When a pointer is added to
39 /// the set, the array is scanned to see if the element already exists, if not
40 /// the element is 'pushed back' onto the array.  If we run out of space in the
41 /// array, we grow into the 'large set' case.  SmallSet should be used when the
42 /// sets are often small.  In this case, no memory allocation is used, and only
43 /// light-weight and cache-efficient scanning is used.
44 ///
45 /// Large sets use a classic exponentially-probed hash table.  Empty buckets are
46 /// represented with an illegal pointer value (-1) to allow null pointers to be
47 /// inserted.  Tombstones are represented with another illegal pointer value
48 /// (-2), to allow deletion.  The hash table is resized when the table is 3/4 or
49 /// more.  When this happens, the table is doubled in size.
50 ///
51 class SmallPtrSetImplBase : public DebugEpochBase {
52   friend class SmallPtrSetIteratorImpl;
53 
54 protected:
55   /// SmallArray - Points to a fixed size set of buckets, used in 'small mode'.
56   const void **SmallArray;
57   /// CurArray - This is the current set of buckets.  If equal to SmallArray,
58   /// then the set is in 'small mode'.
59   const void **CurArray;
60   /// CurArraySize - The allocated size of CurArray, always a power of two.
61   unsigned CurArraySize;
62 
63   /// Number of elements in CurArray that contain a value or are a tombstone.
64   /// If small, all these elements are at the beginning of CurArray and the rest
65   /// is uninitialized.
66   unsigned NumNonEmpty;
67   /// Number of tombstones in CurArray.
68   unsigned NumTombstones;
69 
70   // Helpers to copy and move construct a SmallPtrSet.
71   SmallPtrSetImplBase(const void **SmallStorage,
72                       const SmallPtrSetImplBase &that);
73   SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize,
74                       SmallPtrSetImplBase &&that);
75 
SmallPtrSetImplBase(const void ** SmallStorage,unsigned SmallSize)76   explicit SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize)
77       : SmallArray(SmallStorage), CurArray(SmallStorage),
78         CurArraySize(SmallSize), NumNonEmpty(0), NumTombstones(0) {
79     assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 &&
80            "Initial size must be a power of two!");
81   }
82 
~SmallPtrSetImplBase()83   ~SmallPtrSetImplBase() {
84     if (!isSmall())
85       free(CurArray);
86   }
87 
88 public:
89   using size_type = unsigned;
90 
91   SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete;
92 
empty()93   [[nodiscard]] bool empty() const { return size() == 0; }
size()94   size_type size() const { return NumNonEmpty - NumTombstones; }
95 
clear()96   void clear() {
97     incrementEpoch();
98     // If the capacity of the array is huge, and the # elements used is small,
99     // shrink the array.
100     if (!isSmall()) {
101       if (size() * 4 < CurArraySize && CurArraySize > 32)
102         return shrink_and_clear();
103       // Fill the array with empty markers.
104       memset(CurArray, -1, CurArraySize * sizeof(void *));
105     }
106 
107     NumNonEmpty = 0;
108     NumTombstones = 0;
109   }
110 
111 protected:
getTombstoneMarker()112   static void *getTombstoneMarker() { return reinterpret_cast<void*>(-2); }
113 
getEmptyMarker()114   static void *getEmptyMarker() {
115     // Note that -1 is chosen to make clear() efficiently implementable with
116     // memset and because it's not a valid pointer value.
117     return reinterpret_cast<void*>(-1);
118   }
119 
EndPointer()120   const void **EndPointer() const {
121     return isSmall() ? CurArray + NumNonEmpty : CurArray + CurArraySize;
122   }
123 
124   /// insert_imp - This returns true if the pointer was new to the set, false if
125   /// it was already in the set.  This is hidden from the client so that the
126   /// derived class can check that the right type of pointer is passed in.
insert_imp(const void * Ptr)127   std::pair<const void *const *, bool> insert_imp(const void *Ptr) {
128     if (isSmall()) {
129       // Check to see if it is already in the set.
130       const void **LastTombstone = nullptr;
131       for (const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
132            APtr != E; ++APtr) {
133         const void *Value = *APtr;
134         if (Value == Ptr)
135           return std::make_pair(APtr, false);
136         if (Value == getTombstoneMarker())
137           LastTombstone = APtr;
138       }
139 
140       // Did we find any tombstone marker?
141       if (LastTombstone != nullptr) {
142         *LastTombstone = Ptr;
143         --NumTombstones;
144         incrementEpoch();
145         return std::make_pair(LastTombstone, true);
146       }
147 
148       // Nope, there isn't.  If we stay small, just 'pushback' now.
149       if (NumNonEmpty < CurArraySize) {
150         SmallArray[NumNonEmpty++] = Ptr;
151         incrementEpoch();
152         return std::make_pair(SmallArray + (NumNonEmpty - 1), true);
153       }
154       // Otherwise, hit the big set case, which will call grow.
155     }
156     return insert_imp_big(Ptr);
157   }
158 
159   /// erase_imp - If the set contains the specified pointer, remove it and
160   /// return true, otherwise return false.  This is hidden from the client so
161   /// that the derived class can check that the right type of pointer is passed
162   /// in.
erase_imp(const void * Ptr)163   bool erase_imp(const void * Ptr) {
164     const void *const *P = find_imp(Ptr);
165     if (P == EndPointer())
166       return false;
167 
168     const void **Loc = const_cast<const void **>(P);
169     assert(*Loc == Ptr && "broken find!");
170     *Loc = getTombstoneMarker();
171     NumTombstones++;
172     return true;
173   }
174 
175   /// Returns the raw pointer needed to construct an iterator.  If element not
176   /// found, this will be EndPointer.  Otherwise, it will be a pointer to the
177   /// slot which stores Ptr;
find_imp(const void * Ptr)178   const void *const * find_imp(const void * Ptr) const {
179     if (isSmall()) {
180       // Linear search for the item.
181       for (const void *const *APtr = SmallArray,
182                       *const *E = SmallArray + NumNonEmpty; APtr != E; ++APtr)
183         if (*APtr == Ptr)
184           return APtr;
185       return EndPointer();
186     }
187 
188     // Big set case.
189     auto *Bucket = FindBucketFor(Ptr);
190     if (*Bucket == Ptr)
191       return Bucket;
192     return EndPointer();
193   }
194 
195 private:
isSmall()196   bool isSmall() const { return CurArray == SmallArray; }
197 
198   std::pair<const void *const *, bool> insert_imp_big(const void *Ptr);
199 
200   const void * const *FindBucketFor(const void *Ptr) const;
201   void shrink_and_clear();
202 
203   /// Grow - Allocate a larger backing store for the buckets and move it over.
204   void Grow(unsigned NewSize);
205 
206 protected:
207   /// swap - Swaps the elements of two sets.
208   /// Note: This method assumes that both sets have the same small size.
209   void swap(SmallPtrSetImplBase &RHS);
210 
211   void CopyFrom(const SmallPtrSetImplBase &RHS);
212   void MoveFrom(unsigned SmallSize, SmallPtrSetImplBase &&RHS);
213 
214 private:
215   /// Code shared by MoveFrom() and move constructor.
216   void MoveHelper(unsigned SmallSize, SmallPtrSetImplBase &&RHS);
217   /// Code shared by CopyFrom() and copy constructor.
218   void CopyHelper(const SmallPtrSetImplBase &RHS);
219 };
220 
221 /// SmallPtrSetIteratorImpl - This is the common base class shared between all
222 /// instances of SmallPtrSetIterator.
223 class SmallPtrSetIteratorImpl {
224 protected:
225   const void *const *Bucket;
226   const void *const *End;
227 
228 public:
SmallPtrSetIteratorImpl(const void * const * BP,const void * const * E)229   explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
230     : Bucket(BP), End(E) {
231     if (shouldReverseIterate()) {
232       RetreatIfNotValid();
233       return;
234     }
235     AdvanceIfNotValid();
236   }
237 
238   bool operator==(const SmallPtrSetIteratorImpl &RHS) const {
239     return Bucket == RHS.Bucket;
240   }
241   bool operator!=(const SmallPtrSetIteratorImpl &RHS) const {
242     return Bucket != RHS.Bucket;
243   }
244 
245 protected:
246   /// AdvanceIfNotValid - If the current bucket isn't valid, advance to a bucket
247   /// that is.   This is guaranteed to stop because the end() bucket is marked
248   /// valid.
AdvanceIfNotValid()249   void AdvanceIfNotValid() {
250     assert(Bucket <= End);
251     while (Bucket != End &&
252            (*Bucket == SmallPtrSetImplBase::getEmptyMarker() ||
253             *Bucket == SmallPtrSetImplBase::getTombstoneMarker()))
254       ++Bucket;
255   }
RetreatIfNotValid()256   void RetreatIfNotValid() {
257     assert(Bucket >= End);
258     while (Bucket != End &&
259            (Bucket[-1] == SmallPtrSetImplBase::getEmptyMarker() ||
260             Bucket[-1] == SmallPtrSetImplBase::getTombstoneMarker())) {
261       --Bucket;
262     }
263   }
264 };
265 
266 /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
267 template <typename PtrTy>
268 class LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE SmallPtrSetIterator
269     : public SmallPtrSetIteratorImpl,
270       DebugEpochBase::HandleBase {
271   using PtrTraits = PointerLikeTypeTraits<PtrTy>;
272 
273 public:
274   using value_type = PtrTy;
275   using reference = PtrTy;
276   using pointer = PtrTy;
277   using difference_type = std::ptrdiff_t;
278   using iterator_category = std::forward_iterator_tag;
279 
SmallPtrSetIterator(const void * const * BP,const void * const * E,const DebugEpochBase & Epoch)280   explicit SmallPtrSetIterator(const void *const *BP, const void *const *E,
281                                const DebugEpochBase &Epoch)
282       : SmallPtrSetIteratorImpl(BP, E), DebugEpochBase::HandleBase(&Epoch) {}
283 
284   // Most methods are provided by the base class.
285 
286   const PtrTy operator*() const {
287     assert(isHandleInSync() && "invalid iterator access!");
288     if (shouldReverseIterate()) {
289       assert(Bucket > End);
290       return PtrTraits::getFromVoidPointer(const_cast<void *>(Bucket[-1]));
291     }
292     assert(Bucket < End);
293     return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket));
294   }
295 
296   inline SmallPtrSetIterator& operator++() {          // Preincrement
297     assert(isHandleInSync() && "invalid iterator access!");
298     if (shouldReverseIterate()) {
299       --Bucket;
300       RetreatIfNotValid();
301       return *this;
302     }
303     ++Bucket;
304     AdvanceIfNotValid();
305     return *this;
306   }
307 
308   SmallPtrSetIterator operator++(int) {        // Postincrement
309     SmallPtrSetIterator tmp = *this;
310     ++*this;
311     return tmp;
312   }
313 };
314 
315 /// A templated base class for \c SmallPtrSet which provides the
316 /// typesafe interface that is common across all small sizes.
317 ///
318 /// This is particularly useful for passing around between interface boundaries
319 /// to avoid encoding a particular small size in the interface boundary.
320 template <typename PtrType>
321 class SmallPtrSetImpl : public SmallPtrSetImplBase {
322   using ConstPtrType = typename add_const_past_pointer<PtrType>::type;
323   using PtrTraits = PointerLikeTypeTraits<PtrType>;
324   using ConstPtrTraits = PointerLikeTypeTraits<ConstPtrType>;
325 
326 protected:
327   // Forward constructors to the base.
328   using SmallPtrSetImplBase::SmallPtrSetImplBase;
329 
330 public:
331   using iterator = SmallPtrSetIterator<PtrType>;
332   using const_iterator = SmallPtrSetIterator<PtrType>;
333   using key_type = ConstPtrType;
334   using value_type = PtrType;
335 
336   SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
337 
338   /// Inserts Ptr if and only if there is no element in the container equal to
339   /// Ptr. The bool component of the returned pair is true if and only if the
340   /// insertion takes place, and the iterator component of the pair points to
341   /// the element equal to Ptr.
insert(PtrType Ptr)342   std::pair<iterator, bool> insert(PtrType Ptr) {
343     auto p = insert_imp(PtrTraits::getAsVoidPointer(Ptr));
344     return std::make_pair(makeIterator(p.first), p.second);
345   }
346 
347   /// Insert the given pointer with an iterator hint that is ignored. This is
348   /// identical to calling insert(Ptr), but allows SmallPtrSet to be used by
349   /// std::insert_iterator and std::inserter().
insert(iterator,PtrType Ptr)350   iterator insert(iterator, PtrType Ptr) {
351     return insert(Ptr).first;
352   }
353 
354   /// erase - If the set contains the specified pointer, remove it and return
355   /// true, otherwise return false.
erase(PtrType Ptr)356   bool erase(PtrType Ptr) {
357     return erase_imp(PtrTraits::getAsVoidPointer(Ptr));
358   }
359   /// count - Return 1 if the specified pointer is in the set, 0 otherwise.
count(ConstPtrType Ptr)360   size_type count(ConstPtrType Ptr) const {
361     return find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)) != EndPointer();
362   }
find(ConstPtrType Ptr)363   iterator find(ConstPtrType Ptr) const {
364     return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)));
365   }
contains(ConstPtrType Ptr)366   bool contains(ConstPtrType Ptr) const {
367     return find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)) != EndPointer();
368   }
369 
370   template <typename IterT>
insert(IterT I,IterT E)371   void insert(IterT I, IterT E) {
372     for (; I != E; ++I)
373       insert(*I);
374   }
375 
insert(std::initializer_list<PtrType> IL)376   void insert(std::initializer_list<PtrType> IL) {
377     insert(IL.begin(), IL.end());
378   }
379 
begin()380   iterator begin() const {
381     if (shouldReverseIterate())
382       return makeIterator(EndPointer() - 1);
383     return makeIterator(CurArray);
384   }
end()385   iterator end() const { return makeIterator(EndPointer()); }
386 
387 private:
388   /// Create an iterator that dereferences to same place as the given pointer.
makeIterator(const void * const * P)389   iterator makeIterator(const void *const *P) const {
390     if (shouldReverseIterate())
391       return iterator(P == EndPointer() ? CurArray : P + 1, CurArray, *this);
392     return iterator(P, EndPointer(), *this);
393   }
394 };
395 
396 /// Equality comparison for SmallPtrSet.
397 ///
398 /// Iterates over elements of LHS confirming that each value from LHS is also in
399 /// RHS, and that no additional values are in RHS.
400 template <typename PtrType>
401 bool operator==(const SmallPtrSetImpl<PtrType> &LHS,
402                 const SmallPtrSetImpl<PtrType> &RHS) {
403   if (LHS.size() != RHS.size())
404     return false;
405 
406   for (const auto *KV : LHS)
407     if (!RHS.count(KV))
408       return false;
409 
410   return true;
411 }
412 
413 /// Inequality comparison for SmallPtrSet.
414 ///
415 /// Equivalent to !(LHS == RHS).
416 template <typename PtrType>
417 bool operator!=(const SmallPtrSetImpl<PtrType> &LHS,
418                 const SmallPtrSetImpl<PtrType> &RHS) {
419   return !(LHS == RHS);
420 }
421 
422 /// SmallPtrSet - This class implements a set which is optimized for holding
423 /// SmallSize or less elements.  This internally rounds up SmallSize to the next
424 /// power of two if it is not already a power of two.  See the comments above
425 /// SmallPtrSetImplBase for details of the algorithm.
426 template<class PtrType, unsigned SmallSize>
427 class SmallPtrSet : public SmallPtrSetImpl<PtrType> {
428   // In small mode SmallPtrSet uses linear search for the elements, so it is
429   // not a good idea to choose this value too high. You may consider using a
430   // DenseSet<> instead if you expect many elements in the set.
431   static_assert(SmallSize <= 32, "SmallSize should be small");
432 
433   using BaseT = SmallPtrSetImpl<PtrType>;
434 
435   // A constexpr version of llvm::bit_ceil.
436   // TODO: Replace this with std::bit_ceil once C++20 is available.
RoundUpToPowerOfTwo(size_t X)437   static constexpr size_t RoundUpToPowerOfTwo(size_t X) {
438     size_t C = 1;
439     size_t CMax = C << (std::numeric_limits<size_t>::digits - 1);
440     while (C < X && C < CMax)
441       C <<= 1;
442     return C;
443   }
444 
445   // Make sure that SmallSize is a power of two, round up if not.
446   static constexpr size_t SmallSizePowTwo = RoundUpToPowerOfTwo(SmallSize);
447   /// SmallStorage - Fixed size storage used in 'small mode'.
448   const void *SmallStorage[SmallSizePowTwo];
449 
450 public:
SmallPtrSet()451   SmallPtrSet() : BaseT(SmallStorage, SmallSizePowTwo) {}
SmallPtrSet(const SmallPtrSet & that)452   SmallPtrSet(const SmallPtrSet &that) : BaseT(SmallStorage, that) {}
SmallPtrSet(SmallPtrSet && that)453   SmallPtrSet(SmallPtrSet &&that)
454       : BaseT(SmallStorage, SmallSizePowTwo, std::move(that)) {}
455 
456   template<typename It>
SmallPtrSet(It I,It E)457   SmallPtrSet(It I, It E) : BaseT(SmallStorage, SmallSizePowTwo) {
458     this->insert(I, E);
459   }
460 
SmallPtrSet(std::initializer_list<PtrType> IL)461   SmallPtrSet(std::initializer_list<PtrType> IL)
462       : BaseT(SmallStorage, SmallSizePowTwo) {
463     this->insert(IL.begin(), IL.end());
464   }
465 
466   SmallPtrSet<PtrType, SmallSize> &
467   operator=(const SmallPtrSet<PtrType, SmallSize> &RHS) {
468     if (&RHS != this)
469       this->CopyFrom(RHS);
470     return *this;
471   }
472 
473   SmallPtrSet<PtrType, SmallSize> &
474   operator=(SmallPtrSet<PtrType, SmallSize> &&RHS) {
475     if (&RHS != this)
476       this->MoveFrom(SmallSizePowTwo, std::move(RHS));
477     return *this;
478   }
479 
480   SmallPtrSet<PtrType, SmallSize> &
481   operator=(std::initializer_list<PtrType> IL) {
482     this->clear();
483     this->insert(IL.begin(), IL.end());
484     return *this;
485   }
486 
487   /// swap - Swaps the elements of two sets.
swap(SmallPtrSet<PtrType,SmallSize> & RHS)488   void swap(SmallPtrSet<PtrType, SmallSize> &RHS) {
489     SmallPtrSetImplBase::swap(RHS);
490   }
491 };
492 
493 } // end namespace llvm
494 
495 namespace std {
496 
497   /// Implement std::swap in terms of SmallPtrSet swap.
498   template<class T, unsigned N>
swap(llvm::SmallPtrSet<T,N> & LHS,llvm::SmallPtrSet<T,N> & RHS)499   inline void swap(llvm::SmallPtrSet<T, N> &LHS, llvm::SmallPtrSet<T, N> &RHS) {
500     LHS.swap(RHS);
501   }
502 
503 } // end namespace std
504 
505 #endif // LLVM_ADT_SMALLPTRSET_H
506