1 // Copyright 2021 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef PARTITION_ALLOC_PARTITION_BUCKET_LOOKUP_H_
6 #define PARTITION_ALLOC_PARTITION_BUCKET_LOOKUP_H_
7 
8 #include <array>
9 #include <bit>
10 #include <cstdint>
11 #include <utility>
12 
13 #include "partition_alloc/partition_alloc_base/bits.h"
14 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
15 #include "partition_alloc/partition_alloc_buildflags.h"
16 #include "partition_alloc/partition_alloc_check.h"
17 #include "partition_alloc/partition_alloc_constants.h"
18 
19 namespace partition_alloc::internal {
20 
21 // Don't use an anonymous namespace for the constants because it can inhibit
22 // collapsing them together, even when they are tagged as inline.
23 
24 // Precalculate some shift and mask constants used in the hot path.
25 // Example: malloc(41) == 101001 binary.
26 // Order is 6 (1 << 6-1) == 32 is highest bit set.
27 // order_index is the next three MSB == 010 == 2.
28 // sub_order_index_mask is a mask for the remaining bits == 11 (masking to 01
29 // for the sub_order_index).
OrderIndexShift(uint8_t order)30 constexpr uint8_t OrderIndexShift(uint8_t order) {
31   if (order < kNumBucketsPerOrderBits + 1) {
32     return 0;
33   }
34 
35   return order - (kNumBucketsPerOrderBits + 1);
36 }
37 
OrderSubIndexMask(uint8_t order)38 constexpr size_t OrderSubIndexMask(uint8_t order) {
39   if (order == kBitsPerSizeT) {
40     return static_cast<size_t>(-1) >> (kNumBucketsPerOrderBits + 1);
41   }
42 
43   return ((static_cast<size_t>(1) << order) - 1) >>
44          (kNumBucketsPerOrderBits + 1);
45 }
46 
47 #if BUILDFLAG(HAS_64_BIT_POINTERS)
48 static_assert(kBitsPerSizeT == 64, "");
49 #else
50 static_assert(kBitsPerSizeT == 32, "");
51 #endif  // BUILDFLAG(HAS_64_BIT_POINTERS)
52 
53 // Orders range from 0 to `kBitsPerSizeT`, inclusive.
54 inline constexpr uint8_t kNumOrders = kBitsPerSizeT + 1;
55 
56 template <typename SizeClass, SizeClass... Index>
MakeOrderArray(SizeClass (* order_function)(uint8_t),std::integer_sequence<SizeClass,Index...> seq)57 constexpr auto MakeOrderArray(SizeClass (*order_function)(uint8_t),
58                               std::integer_sequence<SizeClass, Index...> seq) {
59   return std::array{order_function(Index)...};
60 }
61 
62 inline constexpr auto kOrderIndexShift =
63     MakeOrderArray(OrderIndexShift,
64                    std::make_integer_sequence<uint8_t, kNumOrders>{});
65 
66 inline constexpr auto kOrderSubIndexMask =
67     MakeOrderArray(OrderSubIndexMask,
68                    std::make_integer_sequence<size_t, kNumOrders>{});
69 
70 // The class used to generate the bucket lookup table at compile-time.
71 class BucketIndexLookup final {
72  public:
73   PA_ALWAYS_INLINE static constexpr uint16_t GetIndexForNeutralBuckets(
74       size_t size);
75   PA_ALWAYS_INLINE static constexpr uint16_t GetIndexForDenserBuckets(
76       size_t size);
77   PA_ALWAYS_INLINE static constexpr uint16_t GetIndex(size_t size);
78 
BucketIndexLookup()79   constexpr BucketIndexLookup() {
80     constexpr uint16_t sentinel_bucket_index = kNumBuckets;
81 
82     InitBucketSizes();
83 
84     uint16_t* bucket_index_ptr = &bucket_index_lookup_[0];
85     uint16_t bucket_index = 0;
86 
87     // Very small allocations, smaller than the first bucketed order ->
88     // everything goes to the first bucket.
89     for (uint8_t order = 0; order < kMinBucketedOrder; ++order) {
90       for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
91         *bucket_index_ptr++ = 0;
92       }
93     }
94 
95     // Normal buckets.
96     for (uint8_t order = kMinBucketedOrder; order <= kMaxBucketedOrder;
97          ++order) {
98       size_t size = static_cast<size_t>(1) << (order - 1);
99       size_t current_increment = size >> kNumBucketsPerOrderBits;
100       for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
101         *bucket_index_ptr++ = bucket_index;
102 
103         // For small sizes, buckets are close together (current_increment is
104         // small). For instance, for:
105         // - kAlignment == 16 (which is the case on most 64 bit systems)
106         // - kNumBucketsPerOrder == 4
107         //
108         // The 3 next buckets after 16 are {20, 24, 28}. None of these are a
109         // multiple of kAlignment, so they use the next bucket, that is 32 here.
110         if (size % kAlignment != 0) {
111           PA_DCHECK(bucket_sizes_[bucket_index] > size);
112           // Do not increment bucket_index, since in the example above
113           // current_size may be 20, and bucket_sizes_[bucket_index] == 32.
114         } else {
115           PA_DCHECK(bucket_sizes_[bucket_index] == size);
116           bucket_index++;
117         }
118 
119         size += current_increment;
120       }
121     }
122 
123     // Direct-mapped, and overflow.
124     for (uint8_t order = kMaxBucketedOrder + 1; order <= kBitsPerSizeT;
125          ++order) {
126       for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
127         *bucket_index_ptr++ = sentinel_bucket_index;
128       }
129     }
130 
131     // Smaller because some buckets are not valid due to alignment constraints.
132     PA_DCHECK(bucket_index < kNumBuckets);
133     PA_DCHECK(bucket_index_ptr == bucket_index_lookup_ + ((kBitsPerSizeT + 1) *
134                                                           kNumBucketsPerOrder));
135     // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
136     // which tries to overflow to a non-existent order.
137     *bucket_index_ptr = sentinel_bucket_index;
138   }
bucket_sizes()139   constexpr const size_t* bucket_sizes() const { return &bucket_sizes_[0]; }
140 
141  private:
InitBucketSizes()142   constexpr void InitBucketSizes() {
143     size_t current_size = kSmallestBucket;
144     size_t current_increment = kSmallestBucket >> kNumBucketsPerOrderBits;
145     size_t* bucket_size = &bucket_sizes_[0];
146     for (size_t i = 0; i < kNumBucketedOrders; ++i) {
147       for (size_t j = 0; j < kNumBucketsPerOrder; ++j) {
148         // All bucket sizes have to be multiples of kAlignment, skip otherwise.
149         if (current_size % kAlignment == 0) {
150           *bucket_size = current_size;
151           ++bucket_size;
152         }
153         current_size += current_increment;
154       }
155       current_increment <<= 1;
156     }
157 
158     // The remaining buckets are invalid.
159     while (bucket_size < bucket_sizes_ + kNumBuckets) {
160       *(bucket_size++) = kInvalidBucketSize;
161     }
162   }
163 
164   size_t bucket_sizes_[kNumBuckets]{};
165   // The bucket lookup table lets us map a size_t to a bucket quickly.
166   // The trailing +1 caters for the overflow case for very large allocation
167   // sizes.  It is one flat array instead of a 2D array because in the 2D
168   // world, we'd need to index array[blah][max+1] which risks undefined
169   // behavior.
170   uint16_t
171       bucket_index_lookup_[((kBitsPerSizeT + 1) * kNumBucketsPerOrder) + 1]{};
172 };
173 
RoundUpSize(size_t size)174 PA_ALWAYS_INLINE constexpr size_t RoundUpSize(size_t size) {
175   const size_t next_power = std::bit_ceil(size);
176   const size_t prev_power = next_power >> 1;
177   PA_DCHECK(size <= next_power);
178   PA_DCHECK(prev_power < size);
179   if (size <= prev_power * 5 / 4) {
180     return prev_power * 5 / 4;
181   } else {
182     return next_power;
183   }
184 }
185 
RoundUpToOdd(uint16_t size)186 PA_ALWAYS_INLINE constexpr uint16_t RoundUpToOdd(uint16_t size) {
187   return (size % 2 == 0) + size;
188 }
189 
190 // static
GetIndexForDenserBuckets(size_t size)191 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndexForDenserBuckets(
192     size_t size) {
193   // This forces the bucket table to be constant-initialized and immediately
194   // materialized in the binary.
195   constexpr BucketIndexLookup lookup{};
196   const size_t order =
197       kBitsPerSizeT - static_cast<size_t>(std::countl_zero(size));
198   // The order index is simply the next few bits after the most significant
199   // bit.
200   const size_t order_index =
201       (size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
202   // And if the remaining bits are non-zero we must bump the bucket up.
203   const size_t sub_order_index = size & kOrderSubIndexMask[order];
204   const uint16_t index =
205       lookup.bucket_index_lookup_[(order << kNumBucketsPerOrderBits) +
206                                   order_index + !!sub_order_index];
207   PA_DCHECK(index <= kNumBuckets);  // Last one is the sentinel bucket.
208   return index;
209 }
210 
211 // static
212 PA_ALWAYS_INLINE constexpr uint16_t
GetIndexForNeutralBuckets(size_t size)213 BucketIndexLookup::GetIndexForNeutralBuckets(size_t size) {
214   const auto index = GetIndexForDenserBuckets(size);
215   // Below the minimum size, 4 and 8 bucket distributions are the same, since we
216   // can't fit any more buckets per order; this is due to alignment
217   // requirements: each bucket must be a multiple of the alignment, which
218   // implies the difference between buckets must also be a multiple of the
219   // alignment. In smaller orders, this limits the number of buckets we can
220   // have per order. So, for these small order, we do not want to skip every
221   // second bucket.
222   //
223   // We also do not want to go about the index for the max bucketed size.
224   if (size > kAlignment * kNumBucketsPerOrder &&
225       index < GetIndexForDenserBuckets(kMaxBucketed)) {
226     return RoundUpToOdd(index);
227   } else {
228     return index;
229   }
230 }
231 
232 // static
GetIndex(size_t size)233 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndex(size_t size) {
234   // For any order 2^N, under the denser bucket distribution ("Distribution A"),
235   // we have 4 evenly distributed buckets: 2^N, 1.25*2^N, 1.5*2^N, and 1.75*2^N.
236   // These numbers represent the maximum size of an allocation that can go into
237   // a given bucket.
238   //
239   // Under the less dense bucket distribution ("Distribution B"), we only have
240   // 2 buckets for the same order 2^N: 2^N and 1.25*2^N.
241   //
242   // Everything that would be mapped to the last two buckets of an order under
243   // Distribution A is instead mapped to the first bucket of the next order
244   // under Distribution B. The following diagram shows roughly what this looks
245   // like for the order starting from 2^10, as an example.
246   //
247   // A: ... | 2^10 | 1.25*2^10 | 1.5*2^10 | 1.75*2^10 | 2^11 | ...
248   // B: ... | 2^10 | 1.25*2^10 | -------- | --------- | 2^11 | ...
249   //
250   // So, an allocation of size 1.4*2^10 would go into the 1.5*2^10 bucket under
251   // Distribution A, but to the 2^11 bucket under Distribution B.
252   if (1 << 8 < size && size < kHighThresholdForAlternateDistribution) {
253     return BucketIndexLookup::GetIndexForNeutralBuckets(RoundUpSize(size));
254   }
255   return BucketIndexLookup::GetIndexForNeutralBuckets(size);
256 }
257 
258 }  // namespace partition_alloc::internal
259 
260 #endif  // PARTITION_ALLOC_PARTITION_BUCKET_LOOKUP_H_
261