xref: /aosp_15_r20/external/scudo/standalone/primary64.h (revision 76559068c068bd27e82aff38fac3bfc865233bca)
1 //===-- primary64.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef SCUDO_PRIMARY64_H_
10 #define SCUDO_PRIMARY64_H_
11 
12 #include "allocator_common.h"
13 #include "bytemap.h"
14 #include "common.h"
15 #include "condition_variable.h"
16 #include "list.h"
17 #include "local_cache.h"
18 #include "mem_map.h"
19 #include "memtag.h"
20 #include "options.h"
21 #include "release.h"
22 #include "stats.h"
23 #include "string_utils.h"
24 #include "thread_annotations.h"
25 
26 namespace scudo {
27 
28 // SizeClassAllocator64 is an allocator tuned for 64-bit address space.
29 //
30 // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in
31 // Regions, specific to each size class. Note that the base of that mapping is
32 // random (based to the platform specific map() capabilities). If
33 // PrimaryEnableRandomOffset is set, each Region actually starts at a random
34 // offset from its base.
35 //
36 // Regions are mapped incrementally on demand to fulfill allocation requests,
37 // those mappings being split into equally sized Blocks based on the size class
38 // they belong to. The Blocks created are shuffled to prevent predictable
39 // address patterns (the predictability increases with the size of the Blocks).
40 //
41 // The 1st Region (for size class 0) holds the TransferBatches. This is a
42 // structure used to transfer arrays of available pointers from the class size
43 // freelist to the thread specific freelist, and back.
44 //
45 // The memory used by this allocator is never unmapped, but can be partially
46 // released if the platform allows for it.
47 
48 template <typename Config> class SizeClassAllocator64 {
49 public:
50   typedef typename Config::CompactPtrT CompactPtrT;
51   typedef typename Config::SizeClassMap SizeClassMap;
52   typedef typename Config::ConditionVariableT ConditionVariableT;
53   static const uptr CompactPtrScale = Config::getCompactPtrScale();
54   static const uptr RegionSizeLog = Config::getRegionSizeLog();
55   static const uptr GroupSizeLog = Config::getGroupSizeLog();
56   static_assert(RegionSizeLog >= GroupSizeLog,
57                 "Group size shouldn't be greater than the region size");
58   static const uptr GroupScale = GroupSizeLog - CompactPtrScale;
59   typedef SizeClassAllocator64<Config> ThisT;
60   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
61   typedef TransferBatch<ThisT> TransferBatchT;
62   typedef BatchGroup<ThisT> BatchGroupT;
63 
64   // BachClass is used to store internal metadata so it needs to be at least as
65   // large as the largest data structure.
getSizeByClassId(uptr ClassId)66   static uptr getSizeByClassId(uptr ClassId) {
67     return (ClassId == SizeClassMap::BatchClassId)
68                ? roundUp(Max(sizeof(TransferBatchT), sizeof(BatchGroupT)),
69                          1U << CompactPtrScale)
70                : SizeClassMap::getSizeByClassId(ClassId);
71   }
72 
canAllocate(uptr Size)73   static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; }
74 
conditionVariableEnabled()75   static bool conditionVariableEnabled() {
76     return Config::hasConditionVariableT();
77   }
78 
init(s32 ReleaseToOsInterval)79   void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS {
80     DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
81 
82     const uptr PageSize = getPageSizeCached();
83     const uptr GroupSize = (1UL << GroupSizeLog);
84     const uptr PagesInGroup = GroupSize / PageSize;
85     const uptr MinSizeClass = getSizeByClassId(1);
86     // When trying to release pages back to memory, visiting smaller size
87     // classes is expensive. Therefore, we only try to release smaller size
88     // classes when the amount of free blocks goes over a certain threshold (See
89     // the comment in releaseToOSMaybe() for more details). For example, for
90     // size class 32, we only do the release when the size of free blocks is
91     // greater than 97% of pages in a group. However, this may introduce another
92     // issue that if the number of free blocks is bouncing between 97% ~ 100%.
93     // Which means we may try many page releases but only release very few of
94     // them (less than 3% in a group). Even though we have
95     // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these
96     // calls but it will be better to have another guard to mitigate this issue.
97     //
98     // Here we add another constraint on the minimum size requirement. The
99     // constraint is determined by the size of in-use blocks in the minimal size
100     // class. Take size class 32 as an example,
101     //
102     //   +-     one memory group      -+
103     //   +----------------------+------+
104     //   |  97% of free blocks  |      |
105     //   +----------------------+------+
106     //                           \    /
107     //                      3% in-use blocks
108     //
109     //   * The release size threshold is 97%.
110     //
111     // The 3% size in a group is about 7 pages. For two consecutive
112     // releaseToOSMaybe(), we require the difference between `PushedBlocks`
113     // should be greater than 7 pages. This mitigates the page releasing
114     // thrashing which is caused by memory usage bouncing around the threshold.
115     // The smallest size class takes longest time to do the page release so we
116     // use its size of in-use blocks as a heuristic.
117     SmallerBlockReleasePageDelta =
118         PagesInGroup * (1 + MinSizeClass / 16U) / 100;
119 
120     u32 Seed;
121     const u64 Time = getMonotonicTimeFast();
122     if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
123       Seed = static_cast<u32>(Time ^ (reinterpret_cast<uptr>(&Seed) >> 12));
124 
125     for (uptr I = 0; I < NumClasses; I++)
126       getRegionInfo(I)->RandState = getRandomU32(&Seed);
127 
128     if (Config::getEnableContiguousRegions()) {
129       ReservedMemoryT ReservedMemory = {};
130       // Reserve the space required for the Primary.
131       CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses,
132                                   "scudo:primary_reserve"));
133       const uptr PrimaryBase = ReservedMemory.getBase();
134 
135       for (uptr I = 0; I < NumClasses; I++) {
136         MemMapT RegionMemMap = ReservedMemory.dispatch(
137             PrimaryBase + (I << RegionSizeLog), RegionSize);
138         RegionInfo *Region = getRegionInfo(I);
139 
140         initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset());
141       }
142       shuffle(RegionInfoArray, NumClasses, &Seed);
143     }
144 
145     // The binding should be done after region shuffling so that it won't bind
146     // the FLLock from the wrong region.
147     for (uptr I = 0; I < NumClasses; I++)
148       getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock);
149 
150     // The default value in the primary config has the higher priority.
151     if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN)
152       ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs();
153     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
154   }
155 
unmapTestOnly()156   void unmapTestOnly() {
157     for (uptr I = 0; I < NumClasses; I++) {
158       RegionInfo *Region = getRegionInfo(I);
159       {
160         ScopedLock ML(Region->MMLock);
161         MemMapT MemMap = Region->MemMapInfo.MemMap;
162         if (MemMap.isAllocated())
163           MemMap.unmap();
164       }
165       *Region = {};
166     }
167   }
168 
169   // When all blocks are freed, it has to be the same size as `AllocatedUser`.
verifyAllBlocksAreReleasedTestOnly()170   void verifyAllBlocksAreReleasedTestOnly() {
171     // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass.
172     uptr BatchClassUsedInFreeLists = 0;
173     for (uptr I = 0; I < NumClasses; I++) {
174       // We have to count BatchClassUsedInFreeLists in other regions first.
175       if (I == SizeClassMap::BatchClassId)
176         continue;
177       RegionInfo *Region = getRegionInfo(I);
178       ScopedLock ML(Region->MMLock);
179       ScopedLock FL(Region->FLLock);
180       const uptr BlockSize = getSizeByClassId(I);
181       uptr TotalBlocks = 0;
182       for (BatchGroupT &BG : Region->FreeListInfo.BlockList) {
183         // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
184         BatchClassUsedInFreeLists += BG.Batches.size() + 1;
185         for (const auto &It : BG.Batches)
186           TotalBlocks += It.getCount();
187       }
188 
189       DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize);
190       DCHECK_EQ(Region->FreeListInfo.PushedBlocks,
191                 Region->FreeListInfo.PoppedBlocks);
192     }
193 
194     RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId);
195     ScopedLock ML(Region->MMLock);
196     ScopedLock FL(Region->FLLock);
197     const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId);
198     uptr TotalBlocks = 0;
199     for (BatchGroupT &BG : Region->FreeListInfo.BlockList) {
200       if (LIKELY(!BG.Batches.empty())) {
201         for (const auto &It : BG.Batches)
202           TotalBlocks += It.getCount();
203       } else {
204         // `BatchGroup` with empty freelist doesn't have `TransferBatch` record
205         // itself.
206         ++TotalBlocks;
207       }
208     }
209     DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists,
210               Region->MemMapInfo.AllocatedUser / BlockSize);
211     DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
212               Region->FreeListInfo.PushedBlocks);
213     const uptr BlocksInUse =
214         Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
215     DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
216   }
217 
popBlocks(CacheT * C,uptr ClassId,CompactPtrT * ToArray,const u16 MaxBlockCount)218   u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
219                 const u16 MaxBlockCount) {
220     DCHECK_LT(ClassId, NumClasses);
221     RegionInfo *Region = getRegionInfo(ClassId);
222     u16 PopCount = 0;
223 
224     {
225       ScopedLock L(Region->FLLock);
226       PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
227       if (PopCount != 0U)
228         return PopCount;
229     }
230 
231     bool ReportRegionExhausted = false;
232 
233     if (conditionVariableEnabled()) {
234       PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount,
235                                  ReportRegionExhausted);
236     } else {
237       while (true) {
238         // When two threads compete for `Region->MMLock`, we only want one of
239         // them to call populateFreeListAndPopBlocks(). To avoid both of them
240         // doing that, always check the freelist before mapping new pages.
241         ScopedLock ML(Region->MMLock);
242         {
243           ScopedLock FL(Region->FLLock);
244           PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
245           if (PopCount != 0U)
246             return PopCount;
247         }
248 
249         const bool RegionIsExhausted = Region->Exhausted;
250         if (!RegionIsExhausted) {
251           PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
252                                                   MaxBlockCount);
253         }
254         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
255         break;
256       }
257     }
258 
259     if (UNLIKELY(ReportRegionExhausted)) {
260       Printf("Can't populate more pages for size class %zu.\n",
261              getSizeByClassId(ClassId));
262 
263       // Theoretically, BatchClass shouldn't be used up. Abort immediately  when
264       // it happens.
265       if (ClassId == SizeClassMap::BatchClassId)
266         reportOutOfBatchClass();
267     }
268 
269     return PopCount;
270   }
271 
272   // Push the array of free blocks to the designated batch group.
pushBlocks(CacheT * C,uptr ClassId,CompactPtrT * Array,u32 Size)273   void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
274     DCHECK_LT(ClassId, NumClasses);
275     DCHECK_GT(Size, 0);
276 
277     RegionInfo *Region = getRegionInfo(ClassId);
278     if (ClassId == SizeClassMap::BatchClassId) {
279       ScopedLock L(Region->FLLock);
280       pushBatchClassBlocks(Region, Array, Size);
281       if (conditionVariableEnabled())
282         Region->FLLockCV.notifyAll(Region->FLLock);
283       return;
284     }
285 
286     // TODO(chiahungduan): Consider not doing grouping if the group size is not
287     // greater than the block size with a certain scale.
288 
289     bool SameGroup = true;
290     if (GroupSizeLog < RegionSizeLog) {
291       // Sort the blocks so that blocks belonging to the same group can be
292       // pushed together.
293       for (u32 I = 1; I < Size; ++I) {
294         if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I]))
295           SameGroup = false;
296         CompactPtrT Cur = Array[I];
297         u32 J = I;
298         while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) {
299           Array[J] = Array[J - 1];
300           --J;
301         }
302         Array[J] = Cur;
303       }
304     }
305 
306     {
307       ScopedLock L(Region->FLLock);
308       pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup);
309       if (conditionVariableEnabled())
310         Region->FLLockCV.notifyAll(Region->FLLock);
311     }
312   }
313 
disable()314   void disable() NO_THREAD_SAFETY_ANALYSIS {
315     // The BatchClassId must be locked last since other classes can use it.
316     for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
317       if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
318         continue;
319       getRegionInfo(static_cast<uptr>(I))->MMLock.lock();
320       getRegionInfo(static_cast<uptr>(I))->FLLock.lock();
321     }
322     getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock();
323     getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock();
324   }
325 
enable()326   void enable() NO_THREAD_SAFETY_ANALYSIS {
327     getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock();
328     getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock();
329     for (uptr I = 0; I < NumClasses; I++) {
330       if (I == SizeClassMap::BatchClassId)
331         continue;
332       getRegionInfo(I)->FLLock.unlock();
333       getRegionInfo(I)->MMLock.unlock();
334     }
335   }
336 
iterateOverBlocks(F Callback)337   template <typename F> void iterateOverBlocks(F Callback) {
338     for (uptr I = 0; I < NumClasses; I++) {
339       if (I == SizeClassMap::BatchClassId)
340         continue;
341       RegionInfo *Region = getRegionInfo(I);
342       // TODO: The call of `iterateOverBlocks` requires disabling
343       // SizeClassAllocator64. We may consider locking each region on demand
344       // only.
345       Region->FLLock.assertHeld();
346       Region->MMLock.assertHeld();
347       const uptr BlockSize = getSizeByClassId(I);
348       const uptr From = Region->RegionBeg;
349       const uptr To = From + Region->MemMapInfo.AllocatedUser;
350       for (uptr Block = From; Block < To; Block += BlockSize)
351         Callback(Block);
352     }
353   }
354 
getStats(ScopedString * Str)355   void getStats(ScopedString *Str) {
356     // TODO(kostyak): get the RSS per region.
357     uptr TotalMapped = 0;
358     uptr PoppedBlocks = 0;
359     uptr PushedBlocks = 0;
360     for (uptr I = 0; I < NumClasses; I++) {
361       RegionInfo *Region = getRegionInfo(I);
362       {
363         ScopedLock L(Region->MMLock);
364         TotalMapped += Region->MemMapInfo.MappedUser;
365       }
366       {
367         ScopedLock L(Region->FLLock);
368         PoppedBlocks += Region->FreeListInfo.PoppedBlocks;
369         PushedBlocks += Region->FreeListInfo.PushedBlocks;
370       }
371     }
372     const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
373     Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu "
374                 "allocations; remains %zu; ReleaseToOsIntervalMs = %d\n",
375                 TotalMapped >> 20, 0U, PoppedBlocks,
376                 PoppedBlocks - PushedBlocks, IntervalMs >= 0 ? IntervalMs : -1);
377 
378     for (uptr I = 0; I < NumClasses; I++) {
379       RegionInfo *Region = getRegionInfo(I);
380       ScopedLock L1(Region->MMLock);
381       ScopedLock L2(Region->FLLock);
382       getStats(Str, I, Region);
383     }
384   }
385 
getFragmentationInfo(ScopedString * Str)386   void getFragmentationInfo(ScopedString *Str) {
387     Str->append(
388         "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n",
389         getPageSizeCached());
390 
391     for (uptr I = 1; I < NumClasses; I++) {
392       RegionInfo *Region = getRegionInfo(I);
393       ScopedLock L(Region->MMLock);
394       getRegionFragmentationInfo(Region, I, Str);
395     }
396   }
397 
getMemoryGroupFragmentationInfo(ScopedString * Str)398   void getMemoryGroupFragmentationInfo(ScopedString *Str) {
399     Str->append(
400         "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n",
401         getPageSizeCached());
402 
403     for (uptr I = 1; I < NumClasses; I++) {
404       RegionInfo *Region = getRegionInfo(I);
405       ScopedLock L(Region->MMLock);
406       getMemoryGroupFragmentationInfoInRegion(Region, I, Str);
407     }
408   }
409 
setOption(Option O,sptr Value)410   bool setOption(Option O, sptr Value) {
411     if (O == Option::ReleaseInterval) {
412       const s32 Interval = Max(
413           Min(static_cast<s32>(Value), Config::getMaxReleaseToOsIntervalMs()),
414           Config::getMinReleaseToOsIntervalMs());
415       atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval);
416       return true;
417     }
418     // Not supported by the Primary, but not an error either.
419     return true;
420   }
421 
tryReleaseToOS(uptr ClassId,ReleaseToOS ReleaseType)422   uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) {
423     RegionInfo *Region = getRegionInfo(ClassId);
424     // Note that the tryLock() may fail spuriously, given that it should rarely
425     // happen and page releasing is fine to skip, we don't take certain
426     // approaches to ensure one page release is done.
427     if (Region->MMLock.tryLock()) {
428       uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType);
429       Region->MMLock.unlock();
430       return BytesReleased;
431     }
432     return 0;
433   }
434 
releaseToOS(ReleaseToOS ReleaseType)435   uptr releaseToOS(ReleaseToOS ReleaseType) {
436     uptr TotalReleasedBytes = 0;
437     for (uptr I = 0; I < NumClasses; I++) {
438       if (I == SizeClassMap::BatchClassId)
439         continue;
440       RegionInfo *Region = getRegionInfo(I);
441       ScopedLock L(Region->MMLock);
442       TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType);
443     }
444     return TotalReleasedBytes;
445   }
446 
getRegionInfoArrayAddress()447   const char *getRegionInfoArrayAddress() const {
448     return reinterpret_cast<const char *>(RegionInfoArray);
449   }
450 
getRegionInfoArraySize()451   static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
452 
getCompactPtrBaseByClassId(uptr ClassId)453   uptr getCompactPtrBaseByClassId(uptr ClassId) {
454     return getRegionInfo(ClassId)->RegionBeg;
455   }
456 
compactPtr(uptr ClassId,uptr Ptr)457   CompactPtrT compactPtr(uptr ClassId, uptr Ptr) {
458     DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
459     return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr);
460   }
461 
decompactPtr(uptr ClassId,CompactPtrT CompactPtr)462   void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) {
463     DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
464     return reinterpret_cast<void *>(
465         decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr));
466   }
467 
findNearestBlock(const char * RegionInfoData,uptr Ptr)468   static BlockInfo findNearestBlock(const char *RegionInfoData,
469                                     uptr Ptr) NO_THREAD_SAFETY_ANALYSIS {
470     const RegionInfo *RegionInfoArray =
471         reinterpret_cast<const RegionInfo *>(RegionInfoData);
472 
473     uptr ClassId;
474     uptr MinDistance = -1UL;
475     for (uptr I = 0; I != NumClasses; ++I) {
476       if (I == SizeClassMap::BatchClassId)
477         continue;
478       uptr Begin = RegionInfoArray[I].RegionBeg;
479       // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock.
480       // However, the RegionInfoData is passed with const qualifier and lock the
481       // mutex requires modifying RegionInfoData, which means we need to remove
482       // the const qualifier. This may lead to another undefined behavior (The
483       // first one is accessing `AllocatedUser` without locking. It's better to
484       // pass `RegionInfoData` as `void *` then we can lock the mutex properly.
485       uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser;
486       if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I))
487         continue;
488       uptr RegionDistance;
489       if (Begin <= Ptr) {
490         if (Ptr < End)
491           RegionDistance = 0;
492         else
493           RegionDistance = Ptr - End;
494       } else {
495         RegionDistance = Begin - Ptr;
496       }
497 
498       if (RegionDistance < MinDistance) {
499         MinDistance = RegionDistance;
500         ClassId = I;
501       }
502     }
503 
504     BlockInfo B = {};
505     if (MinDistance <= 8192) {
506       B.RegionBegin = RegionInfoArray[ClassId].RegionBeg;
507       B.RegionEnd =
508           B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser;
509       B.BlockSize = SizeClassMap::getSizeByClassId(ClassId);
510       B.BlockBegin =
511           B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) *
512                                sptr(B.BlockSize));
513       while (B.BlockBegin < B.RegionBegin)
514         B.BlockBegin += B.BlockSize;
515       while (B.RegionEnd < B.BlockBegin + B.BlockSize)
516         B.BlockBegin -= B.BlockSize;
517     }
518     return B;
519   }
520 
521   AtomicOptions Options;
522 
523 private:
524   static const uptr RegionSize = 1UL << RegionSizeLog;
525   static const uptr NumClasses = SizeClassMap::NumClasses;
526 
527   static const uptr MapSizeIncrement = Config::getMapSizeIncrement();
528   // Fill at most this number of batches from the newly map'd memory.
529   static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U;
530 
531   struct ReleaseToOsInfo {
532     uptr BytesInFreeListAtLastCheckpoint;
533     uptr RangesReleased;
534     uptr LastReleasedBytes;
535     // The minimum size of pushed blocks to trigger page release.
536     uptr TryReleaseThreshold;
537     // The number of bytes not triggering `releaseToOSMaybe()` because of
538     // the length of release interval.
539     uptr PendingPushedBytesDelta;
540     u64 LastReleaseAtNs;
541   };
542 
543   struct BlocksInfo {
544     SinglyLinkedList<BatchGroupT> BlockList = {};
545     uptr PoppedBlocks = 0;
546     uptr PushedBlocks = 0;
547   };
548 
549   struct PagesInfo {
550     MemMapT MemMap = {};
551     // Bytes mapped for user memory.
552     uptr MappedUser = 0;
553     // Bytes allocated for user memory.
554     uptr AllocatedUser = 0;
555   };
556 
557   struct UnpaddedRegionInfo {
558     // Mutex for operations on freelist
559     HybridMutex FLLock;
560     ConditionVariableT FLLockCV GUARDED_BY(FLLock);
561     // Mutex for memmap operations
562     HybridMutex MMLock ACQUIRED_BEFORE(FLLock);
563     // `RegionBeg` is initialized before thread creation and won't be changed.
564     uptr RegionBeg = 0;
565     u32 RandState GUARDED_BY(MMLock) = 0;
566     BlocksInfo FreeListInfo GUARDED_BY(FLLock);
567     PagesInfo MemMapInfo GUARDED_BY(MMLock);
568     ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {};
569     bool Exhausted GUARDED_BY(MMLock) = false;
570     bool isPopulatingFreeList GUARDED_BY(FLLock) = false;
571   };
572   struct RegionInfo : UnpaddedRegionInfo {
573     char Padding[SCUDO_CACHE_LINE_SIZE -
574                  (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {};
575   };
576   static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
577 
getRegionInfo(uptr ClassId)578   RegionInfo *getRegionInfo(uptr ClassId) {
579     DCHECK_LT(ClassId, NumClasses);
580     return &RegionInfoArray[ClassId];
581   }
582 
getRegionBaseByClassId(uptr ClassId)583   uptr getRegionBaseByClassId(uptr ClassId) {
584     RegionInfo *Region = getRegionInfo(ClassId);
585     Region->MMLock.assertHeld();
586 
587     if (!Config::getEnableContiguousRegions() &&
588         !Region->MemMapInfo.MemMap.isAllocated()) {
589       return 0U;
590     }
591     return Region->MemMapInfo.MemMap.getBase();
592   }
593 
compactPtrInternal(uptr Base,uptr Ptr)594   static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) {
595     return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale);
596   }
597 
decompactPtrInternal(uptr Base,CompactPtrT CompactPtr)598   static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) {
599     return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale);
600   }
601 
compactPtrGroup(CompactPtrT CompactPtr)602   static uptr compactPtrGroup(CompactPtrT CompactPtr) {
603     const uptr Mask = (static_cast<uptr>(1) << GroupScale) - 1;
604     return static_cast<uptr>(CompactPtr) & ~Mask;
605   }
decompactGroupBase(uptr Base,uptr CompactPtrGroupBase)606   static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) {
607     DCHECK_EQ(CompactPtrGroupBase % (static_cast<uptr>(1) << (GroupScale)), 0U);
608     return Base + (CompactPtrGroupBase << CompactPtrScale);
609   }
610 
isSmallBlock(uptr BlockSize)611   ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) {
612     const uptr PageSize = getPageSizeCached();
613     return BlockSize < PageSize / 16U;
614   }
615 
getMinReleaseAttemptSize(uptr BlockSize)616   ALWAYS_INLINE uptr getMinReleaseAttemptSize(uptr BlockSize) {
617     return roundUp(BlockSize, getPageSizeCached());
618   }
619 
initRegion(RegionInfo * Region,uptr ClassId,MemMapT MemMap,bool EnableRandomOffset)620   ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId,
621                                 MemMapT MemMap, bool EnableRandomOffset)
622       REQUIRES(Region->MMLock) {
623     DCHECK(!Region->MemMapInfo.MemMap.isAllocated());
624     DCHECK(MemMap.isAllocated());
625 
626     const uptr PageSize = getPageSizeCached();
627 
628     Region->MemMapInfo.MemMap = MemMap;
629 
630     Region->RegionBeg = MemMap.getBase();
631     if (EnableRandomOffset) {
632       Region->RegionBeg +=
633           (getRandomModN(&Region->RandState, 16) + 1) * PageSize;
634     }
635 
636     const uptr BlockSize = getSizeByClassId(ClassId);
637     // Releasing small blocks is expensive, set a higher threshold to avoid
638     // frequent page releases.
639     if (isSmallBlock(BlockSize)) {
640       Region->ReleaseInfo.TryReleaseThreshold =
641           PageSize * SmallerBlockReleasePageDelta;
642     } else {
643       Region->ReleaseInfo.TryReleaseThreshold =
644           getMinReleaseAttemptSize(BlockSize);
645     }
646   }
647 
pushBatchClassBlocks(RegionInfo * Region,CompactPtrT * Array,u32 Size)648   void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size)
649       REQUIRES(Region->FLLock) {
650     DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId));
651 
652     // Free blocks are recorded by TransferBatch in freelist for all
653     // size-classes. In addition, TransferBatch is allocated from BatchClassId.
654     // In order not to use additional block to record the free blocks in
655     // BatchClassId, they are self-contained. I.e., A TransferBatch records the
656     // block address of itself. See the figure below:
657     //
658     // TransferBatch at 0xABCD
659     // +----------------------------+
660     // | Free blocks' addr          |
661     // | +------+------+------+     |
662     // | |0xABCD|...   |...   |     |
663     // | +------+------+------+     |
664     // +----------------------------+
665     //
666     // When we allocate all the free blocks in the TransferBatch, the block used
667     // by TransferBatch is also free for use. We don't need to recycle the
668     // TransferBatch. Note that the correctness is maintained by the invariant,
669     //
670     //   Each popBlocks() request returns the entire TransferBatch. Returning
671     //   part of the blocks in a TransferBatch is invalid.
672     //
673     // This ensures that TransferBatch won't leak the address itself while it's
674     // still holding other valid data.
675     //
676     // Besides, BatchGroup is also allocated from BatchClassId and has its
677     // address recorded in the TransferBatch too. To maintain the correctness,
678     //
679     //   The address of BatchGroup is always recorded in the last TransferBatch
680     //   in the freelist (also imply that the freelist should only be
681     //   updated with push_front). Once the last TransferBatch is popped,
682     //   the block used by BatchGroup is also free for use.
683     //
684     // With this approach, the blocks used by BatchGroup and TransferBatch are
685     // reusable and don't need additional space for them.
686 
687     Region->FreeListInfo.PushedBlocks += Size;
688     BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
689 
690     if (BG == nullptr) {
691       // Construct `BatchGroup` on the last element.
692       BG = reinterpret_cast<BatchGroupT *>(
693           decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
694       --Size;
695       BG->Batches.clear();
696       // BatchClass hasn't enabled memory group. Use `0` to indicate there's no
697       // memory group here.
698       BG->CompactPtrGroupBase = 0;
699       BG->BytesInBGAtLastCheckpoint = 0;
700       BG->MaxCachedPerBatch =
701           CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
702 
703       Region->FreeListInfo.BlockList.push_front(BG);
704     }
705 
706     if (UNLIKELY(Size == 0))
707       return;
708 
709     // This happens under 2 cases.
710     //   1. just allocated a new `BatchGroup`.
711     //   2. Only 1 block is pushed when the freelist is empty.
712     if (BG->Batches.empty()) {
713       // Construct the `TransferBatch` on the last element.
714       TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
715           decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
716       TB->clear();
717       // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
718       // recorded in the TransferBatch.
719       TB->add(Array[Size - 1]);
720       TB->add(
721           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
722       --Size;
723       BG->Batches.push_front(TB);
724     }
725 
726     TransferBatchT *CurBatch = BG->Batches.front();
727     DCHECK_NE(CurBatch, nullptr);
728 
729     for (u32 I = 0; I < Size;) {
730       u16 UnusedSlots =
731           static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
732       if (UnusedSlots == 0) {
733         CurBatch = reinterpret_cast<TransferBatchT *>(
734             decompactPtr(SizeClassMap::BatchClassId, Array[I]));
735         CurBatch->clear();
736         // Self-contained
737         CurBatch->add(Array[I]);
738         ++I;
739         // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of
740         // BatchClassId.
741         BG->Batches.push_front(CurBatch);
742         UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - 1);
743       }
744       // `UnusedSlots` is u16 so the result will be also fit in u16.
745       const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
746       CurBatch->appendFromArray(&Array[I], AppendSize);
747       I += AppendSize;
748     }
749   }
750 
751   // Push the blocks to their batch group. The layout will be like,
752   //
753   // FreeListInfo.BlockList - > BG -> BG -> BG
754   //                            |     |     |
755   //                            v     v     v
756   //                            TB    TB    TB
757   //                            |
758   //                            v
759   //                            TB
760   //
761   // Each BlockGroup(BG) will associate with unique group id and the free blocks
762   // are managed by a list of TransferBatch(TB). To reduce the time of inserting
763   // blocks, BGs are sorted and the input `Array` are supposed to be sorted so
764   // that we can get better performance of maintaining sorted property.
765   // Use `SameGroup=true` to indicate that all blocks in the array are from the
766   // same group then we will skip checking the group id of each block.
767   void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
768                       CompactPtrT *Array, u32 Size, bool SameGroup = false)
769       REQUIRES(Region->FLLock) {
770     DCHECK_NE(ClassId, SizeClassMap::BatchClassId);
771     DCHECK_GT(Size, 0U);
772 
773     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
774       BatchGroupT *BG =
775           reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock());
776       BG->Batches.clear();
777       TransferBatchT *TB =
778           reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
779       TB->clear();
780 
781       BG->CompactPtrGroupBase = CompactPtrGroupBase;
782       BG->Batches.push_front(TB);
783       BG->BytesInBGAtLastCheckpoint = 0;
784       BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
785 
786       return BG;
787     };
788 
789     auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) {
790       SinglyLinkedList<TransferBatchT> &Batches = BG->Batches;
791       TransferBatchT *CurBatch = Batches.front();
792       DCHECK_NE(CurBatch, nullptr);
793 
794       for (u32 I = 0; I < Size;) {
795         DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
796         u16 UnusedSlots =
797             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
798         if (UnusedSlots == 0) {
799           CurBatch =
800               reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
801           CurBatch->clear();
802           Batches.push_front(CurBatch);
803           UnusedSlots = BG->MaxCachedPerBatch;
804         }
805         // `UnusedSlots` is u16 so the result will be also fit in u16.
806         u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
807         CurBatch->appendFromArray(&Array[I], AppendSize);
808         I += AppendSize;
809       }
810     };
811 
812     Region->FreeListInfo.PushedBlocks += Size;
813     BatchGroupT *Cur = Region->FreeListInfo.BlockList.front();
814 
815     // In the following, `Cur` always points to the BatchGroup for blocks that
816     // will be pushed next. `Prev` is the element right before `Cur`.
817     BatchGroupT *Prev = nullptr;
818 
819     while (Cur != nullptr &&
820            compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) {
821       Prev = Cur;
822       Cur = Cur->Next;
823     }
824 
825     if (Cur == nullptr ||
826         compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) {
827       Cur = CreateGroup(compactPtrGroup(Array[0]));
828       if (Prev == nullptr)
829         Region->FreeListInfo.BlockList.push_front(Cur);
830       else
831         Region->FreeListInfo.BlockList.insert(Prev, Cur);
832     }
833 
834     // All the blocks are from the same group, just push without checking group
835     // id.
836     if (SameGroup) {
837       for (u32 I = 0; I < Size; ++I)
838         DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase);
839 
840       InsertBlocks(Cur, Array, Size);
841       return;
842     }
843 
844     // The blocks are sorted by group id. Determine the segment of group and
845     // push them to their group together.
846     u32 Count = 1;
847     for (u32 I = 1; I < Size; ++I) {
848       if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) {
849         DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase);
850         InsertBlocks(Cur, Array + I - Count, Count);
851 
852         while (Cur != nullptr &&
853                compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) {
854           Prev = Cur;
855           Cur = Cur->Next;
856         }
857 
858         if (Cur == nullptr ||
859             compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) {
860           Cur = CreateGroup(compactPtrGroup(Array[I]));
861           DCHECK_NE(Prev, nullptr);
862           Region->FreeListInfo.BlockList.insert(Prev, Cur);
863         }
864 
865         Count = 1;
866       } else {
867         ++Count;
868       }
869     }
870 
871     InsertBlocks(Cur, Array + Size - Count, Count);
872   }
873 
popBlocksWithCV(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount,bool & ReportRegionExhausted)874   u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
875                       CompactPtrT *ToArray, const u16 MaxBlockCount,
876                       bool &ReportRegionExhausted) {
877     u16 PopCount = 0;
878 
879     while (true) {
880       // We only expect one thread doing the freelist refillment and other
881       // threads will be waiting for either the completion of the
882       // `populateFreeListAndPopBlocks()` or `pushBlocks()` called by other
883       // threads.
884       bool PopulateFreeList = false;
885       {
886         ScopedLock FL(Region->FLLock);
887         if (!Region->isPopulatingFreeList) {
888           Region->isPopulatingFreeList = true;
889           PopulateFreeList = true;
890         }
891       }
892 
893       if (PopulateFreeList) {
894         ScopedLock ML(Region->MMLock);
895 
896         const bool RegionIsExhausted = Region->Exhausted;
897         if (!RegionIsExhausted) {
898           PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
899                                                   MaxBlockCount);
900         }
901         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
902 
903         {
904           // Before reacquiring the `FLLock`, the freelist may be used up again
905           // and some threads are waiting for the freelist refillment by the
906           // current thread. It's important to set
907           // `Region->isPopulatingFreeList` to false so the threads about to
908           // sleep will notice the status change.
909           ScopedLock FL(Region->FLLock);
910           Region->isPopulatingFreeList = false;
911           Region->FLLockCV.notifyAll(Region->FLLock);
912         }
913 
914         break;
915       }
916 
917       // At here, there are two preconditions to be met before waiting,
918       //   1. The freelist is empty.
919       //   2. Region->isPopulatingFreeList == true, i.e, someone is still doing
920       //   `populateFreeListAndPopBlocks()`.
921       //
922       // Note that it has the chance that freelist is empty but
923       // Region->isPopulatingFreeList == false because all the new populated
924       // blocks were used up right after the refillment. Therefore, we have to
925       // check if someone is still populating the freelist.
926       ScopedLock FL(Region->FLLock);
927       PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
928       if (PopCount != 0U)
929         break;
930 
931       if (!Region->isPopulatingFreeList)
932         continue;
933 
934       // Now the freelist is empty and someone's doing the refillment. We will
935       // wait until anyone refills the freelist or someone finishes doing
936       // `populateFreeListAndPopBlocks()`. The refillment can be done by
937       // `populateFreeListAndPopBlocks()`, `pushBlocks()`,
938       // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`.
939       Region->FLLockCV.wait(Region->FLLock);
940 
941       PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
942       if (PopCount != 0U)
943         break;
944     }
945 
946     return PopCount;
947   }
948 
popBlocksImpl(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount)949   u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
950                     CompactPtrT *ToArray, const u16 MaxBlockCount)
951       REQUIRES(Region->FLLock) {
952     if (Region->FreeListInfo.BlockList.empty())
953       return 0U;
954 
955     SinglyLinkedList<TransferBatchT> &Batches =
956         Region->FreeListInfo.BlockList.front()->Batches;
957 
958     if (Batches.empty()) {
959       DCHECK_EQ(ClassId, SizeClassMap::BatchClassId);
960       BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
961       Region->FreeListInfo.BlockList.pop_front();
962 
963       // Block used by `BatchGroup` is from BatchClassId. Turn the block into
964       // `TransferBatch` with single block.
965       TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
966       ToArray[0] =
967           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
968       Region->FreeListInfo.PoppedBlocks += 1;
969       return 1U;
970     }
971 
972     // So far, instead of always filling blocks to `MaxBlockCount`, we only
973     // examine single `TransferBatch` to minimize the time spent in the primary
974     // allocator. Besides, the sizes of `TransferBatch` and
975     // `CacheT::getMaxCached()` may also impact the time spent on accessing the
976     // primary allocator.
977     // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
978     // blocks and/or adjust the size of `TransferBatch` according to
979     // `CacheT::getMaxCached()`.
980     TransferBatchT *B = Batches.front();
981     DCHECK_NE(B, nullptr);
982     DCHECK_GT(B->getCount(), 0U);
983 
984     // BachClassId should always take all blocks in the TransferBatch. Read the
985     // comment in `pushBatchClassBlocks()` for more details.
986     const u16 PopCount = ClassId == SizeClassMap::BatchClassId
987                              ? B->getCount()
988                              : Min(MaxBlockCount, B->getCount());
989     B->moveNToArray(ToArray, PopCount);
990 
991     // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
992     // done without holding `FLLock`.
993     if (B->empty()) {
994       Batches.pop_front();
995       // `TransferBatch` of BatchClassId is self-contained, no need to
996       // deallocate. Read the comment in `pushBatchClassBlocks()` for more
997       // details.
998       if (ClassId != SizeClassMap::BatchClassId)
999         C->deallocate(SizeClassMap::BatchClassId, B);
1000 
1001       if (Batches.empty()) {
1002         BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
1003         Region->FreeListInfo.BlockList.pop_front();
1004 
1005         // We don't keep BatchGroup with zero blocks to avoid empty-checking
1006         // while allocating. Note that block used for constructing BatchGroup is
1007         // recorded as free blocks in the last element of BatchGroup::Batches.
1008         // Which means, once we pop the last TransferBatch, the block is
1009         // implicitly deallocated.
1010         if (ClassId != SizeClassMap::BatchClassId)
1011           C->deallocate(SizeClassMap::BatchClassId, BG);
1012       }
1013     }
1014 
1015     Region->FreeListInfo.PoppedBlocks += PopCount;
1016 
1017     return PopCount;
1018   }
1019 
populateFreeListAndPopBlocks(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount)1020   NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId,
1021                                             RegionInfo *Region,
1022                                             CompactPtrT *ToArray,
1023                                             const u16 MaxBlockCount)
1024       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1025     if (!Config::getEnableContiguousRegions() &&
1026         !Region->MemMapInfo.MemMap.isAllocated()) {
1027       ReservedMemoryT ReservedMemory;
1028       if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize,
1029                                           "scudo:primary_reserve",
1030                                           MAP_ALLOWNOMEM))) {
1031         Printf("Can't reserve pages for size class %zu.\n",
1032                getSizeByClassId(ClassId));
1033         return 0U;
1034       }
1035       initRegion(Region, ClassId,
1036                  ReservedMemory.dispatch(ReservedMemory.getBase(),
1037                                          ReservedMemory.getCapacity()),
1038                  /*EnableRandomOffset=*/false);
1039     }
1040 
1041     DCHECK(Region->MemMapInfo.MemMap.isAllocated());
1042     const uptr Size = getSizeByClassId(ClassId);
1043     const u16 MaxCount = CacheT::getMaxCached(Size);
1044     const uptr RegionBeg = Region->RegionBeg;
1045     const uptr MappedUser = Region->MemMapInfo.MappedUser;
1046     const uptr TotalUserBytes =
1047         Region->MemMapInfo.AllocatedUser + MaxCount * Size;
1048     // Map more space for blocks, if necessary.
1049     if (TotalUserBytes > MappedUser) {
1050       // Do the mmap for the user memory.
1051       const uptr MapSize =
1052           roundUp(TotalUserBytes - MappedUser, MapSizeIncrement);
1053       const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
1054       if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
1055         Region->Exhausted = true;
1056         return 0U;
1057       }
1058 
1059       if (UNLIKELY(!Region->MemMapInfo.MemMap.remap(
1060               RegionBeg + MappedUser, MapSize, "scudo:primary",
1061               MAP_ALLOWNOMEM | MAP_RESIZABLE |
1062                   (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG
1063                                                             : 0)))) {
1064         return 0U;
1065       }
1066       Region->MemMapInfo.MappedUser += MapSize;
1067       C->getStats().add(StatMapped, MapSize);
1068     }
1069 
1070     const u32 NumberOfBlocks =
1071         Min(MaxNumBatches * MaxCount,
1072             static_cast<u32>((Region->MemMapInfo.MappedUser -
1073                               Region->MemMapInfo.AllocatedUser) /
1074                              Size));
1075     DCHECK_GT(NumberOfBlocks, 0);
1076 
1077     constexpr u32 ShuffleArraySize =
1078         MaxNumBatches * TransferBatchT::MaxNumCached;
1079     CompactPtrT ShuffleArray[ShuffleArraySize];
1080     DCHECK_LE(NumberOfBlocks, ShuffleArraySize);
1081 
1082     const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
1083     uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser;
1084     for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
1085       ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P);
1086 
1087     ScopedLock L(Region->FLLock);
1088 
1089     if (ClassId != SizeClassMap::BatchClassId) {
1090       u32 N = 1;
1091       uptr CurGroup = compactPtrGroup(ShuffleArray[0]);
1092       for (u32 I = 1; I < NumberOfBlocks; I++) {
1093         if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) {
1094           shuffle(ShuffleArray + I - N, N, &Region->RandState);
1095           pushBlocksImpl(C, ClassId, Region, ShuffleArray + I - N, N,
1096                          /*SameGroup=*/true);
1097           N = 1;
1098           CurGroup = compactPtrGroup(ShuffleArray[I]);
1099         } else {
1100           ++N;
1101         }
1102       }
1103 
1104       shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState);
1105       pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N,
1106                      /*SameGroup=*/true);
1107     } else {
1108       pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks);
1109     }
1110 
1111     const u16 PopCount =
1112         popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
1113     DCHECK_NE(PopCount, 0U);
1114 
1115     // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record
1116     // the requests from `PushBlocks` and `PopBatch` which are external
1117     // interfaces. `populateFreeListAndPopBlocks` is the internal interface so
1118     // we should set the values back to avoid incorrectly setting the stats.
1119     Region->FreeListInfo.PushedBlocks -= NumberOfBlocks;
1120 
1121     const uptr AllocatedUser = Size * NumberOfBlocks;
1122     C->getStats().add(StatFree, AllocatedUser);
1123     Region->MemMapInfo.AllocatedUser += AllocatedUser;
1124 
1125     return PopCount;
1126   }
1127 
getStats(ScopedString * Str,uptr ClassId,RegionInfo * Region)1128   void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region)
1129       REQUIRES(Region->MMLock, Region->FLLock) {
1130     if (Region->MemMapInfo.MappedUser == 0)
1131       return;
1132     const uptr BlockSize = getSizeByClassId(ClassId);
1133     const uptr InUseBlocks =
1134         Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
1135     const uptr BytesInFreeList =
1136         Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize;
1137     uptr RegionPushedBytesDelta = 0;
1138     if (BytesInFreeList >=
1139         Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
1140       RegionPushedBytesDelta =
1141           BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1142     }
1143     const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize;
1144     Str->append(
1145         "%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu "
1146         "inuse: %6zu total: %6zu releases: %6zu last "
1147         "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx (0x%zx)\n",
1148         Region->Exhausted ? "E" : " ", ClassId, getSizeByClassId(ClassId),
1149         Region->MemMapInfo.MappedUser >> 10, Region->FreeListInfo.PoppedBlocks,
1150         Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks,
1151         Region->ReleaseInfo.RangesReleased,
1152         Region->ReleaseInfo.LastReleasedBytes >> 10,
1153         RegionPushedBytesDelta >> 10, Region->RegionBeg,
1154         getRegionBaseByClassId(ClassId));
1155   }
1156 
getRegionFragmentationInfo(RegionInfo * Region,uptr ClassId,ScopedString * Str)1157   void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId,
1158                                   ScopedString *Str) REQUIRES(Region->MMLock) {
1159     const uptr BlockSize = getSizeByClassId(ClassId);
1160     const uptr AllocatedUserEnd =
1161         Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1162 
1163     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1164     {
1165       ScopedLock L(Region->FLLock);
1166       GroupsToRelease = Region->FreeListInfo.BlockList;
1167       Region->FreeListInfo.BlockList.clear();
1168     }
1169 
1170     FragmentationRecorder Recorder;
1171     if (!GroupsToRelease.empty()) {
1172       PageReleaseContext Context =
1173           markFreeBlocks(Region, BlockSize, AllocatedUserEnd,
1174                          getCompactPtrBaseByClassId(ClassId), GroupsToRelease);
1175       auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
1176       releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
1177 
1178       mergeGroupsToReleaseBack(Region, GroupsToRelease);
1179     }
1180 
1181     ScopedLock L(Region->FLLock);
1182     const uptr PageSize = getPageSizeCached();
1183     const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize;
1184     const uptr InUseBlocks =
1185         Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
1186     const uptr AllocatedPagesCount =
1187         roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize;
1188     DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount());
1189     const uptr InUsePages =
1190         AllocatedPagesCount - Recorder.getReleasedPagesCount();
1191     const uptr InUseBytes = InUsePages * PageSize;
1192 
1193     uptr Integral;
1194     uptr Fractional;
1195     computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral,
1196                       &Fractional);
1197     Str->append("  %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total "
1198                 "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n",
1199                 ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages,
1200                 AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional);
1201   }
1202 
getMemoryGroupFragmentationInfoInRegion(RegionInfo * Region,uptr ClassId,ScopedString * Str)1203   void getMemoryGroupFragmentationInfoInRegion(RegionInfo *Region, uptr ClassId,
1204                                                ScopedString *Str)
1205       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1206     const uptr BlockSize = getSizeByClassId(ClassId);
1207     const uptr AllocatedUserEnd =
1208         Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1209 
1210     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1211     {
1212       ScopedLock L(Region->FLLock);
1213       GroupsToRelease = Region->FreeListInfo.BlockList;
1214       Region->FreeListInfo.BlockList.clear();
1215     }
1216 
1217     constexpr uptr GroupSize = (1UL << GroupSizeLog);
1218     constexpr uptr MaxNumGroups = RegionSize / GroupSize;
1219 
1220     MemoryGroupFragmentationRecorder<GroupSize, MaxNumGroups> Recorder;
1221     if (!GroupsToRelease.empty()) {
1222       PageReleaseContext Context =
1223           markFreeBlocks(Region, BlockSize, AllocatedUserEnd,
1224                          getCompactPtrBaseByClassId(ClassId), GroupsToRelease);
1225       auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
1226       releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
1227 
1228       mergeGroupsToReleaseBack(Region, GroupsToRelease);
1229     }
1230 
1231     Str->append("MemoryGroupFragmentationInfo in Region %zu (%zu)\n", ClassId,
1232                 BlockSize);
1233 
1234     const uptr MaxNumGroupsInUse =
1235         roundUp(Region->MemMapInfo.AllocatedUser, GroupSize) / GroupSize;
1236     for (uptr I = 0; I < MaxNumGroupsInUse; ++I) {
1237       uptr Integral;
1238       uptr Fractional;
1239       computePercentage(Recorder.NumPagesInOneGroup -
1240                             Recorder.getNumFreePages(I),
1241                         Recorder.NumPagesInOneGroup, &Integral, &Fractional);
1242       Str->append("MemoryGroup #%zu (0x%zx): util: %3zu.%02zu%%\n", I,
1243                   Region->RegionBeg + I * GroupSize, Integral, Fractional);
1244     }
1245   }
1246 
1247   NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
1248                                  ReleaseToOS ReleaseType = ReleaseToOS::Normal)
1249       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1250     const uptr BlockSize = getSizeByClassId(ClassId);
1251     uptr BytesInFreeList;
1252     const uptr AllocatedUserEnd =
1253         Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1254     uptr RegionPushedBytesDelta = 0;
1255     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1256 
1257     {
1258       ScopedLock L(Region->FLLock);
1259 
1260       BytesInFreeList = Region->MemMapInfo.AllocatedUser -
1261                         (Region->FreeListInfo.PoppedBlocks -
1262                          Region->FreeListInfo.PushedBlocks) *
1263                             BlockSize;
1264       if (UNLIKELY(BytesInFreeList == 0))
1265         return false;
1266 
1267       // ==================================================================== //
1268       // 1. Check if we have enough free blocks and if it's worth doing a page
1269       //    release.
1270       // ==================================================================== //
1271       if (ReleaseType != ReleaseToOS::ForceAll &&
1272           !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList,
1273                                    ReleaseType)) {
1274         return 0;
1275       }
1276 
1277       // Given that we will unlock the freelist for block operations, cache the
1278       // value here so that when we are adapting the `TryReleaseThreshold`
1279       // later, we are using the right metric.
1280       RegionPushedBytesDelta =
1281           BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1282 
1283       // ==================================================================== //
1284       // 2. Determine which groups can release the pages. Use a heuristic to
1285       //    gather groups that are candidates for doing a release.
1286       // ==================================================================== //
1287       if (ReleaseType == ReleaseToOS::ForceAll) {
1288         GroupsToRelease = Region->FreeListInfo.BlockList;
1289         Region->FreeListInfo.BlockList.clear();
1290       } else {
1291         GroupsToRelease =
1292             collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd,
1293                                    getCompactPtrBaseByClassId(ClassId));
1294       }
1295       if (GroupsToRelease.empty())
1296         return 0;
1297     }
1298 
1299     // Note that we have extracted the `GroupsToRelease` from region freelist.
1300     // It's safe to let pushBlocks()/popBlocks() access the remaining region
1301     // freelist. In the steps 3 and 4, we will temporarily release the FLLock
1302     // and lock it again before step 5.
1303 
1304     // ==================================================================== //
1305     // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`.
1306     //    Then we can tell which pages are in-use by querying
1307     //    `PageReleaseContext`.
1308     // ==================================================================== //
1309     PageReleaseContext Context =
1310         markFreeBlocks(Region, BlockSize, AllocatedUserEnd,
1311                        getCompactPtrBaseByClassId(ClassId), GroupsToRelease);
1312     if (UNLIKELY(!Context.hasBlockMarked())) {
1313       mergeGroupsToReleaseBack(Region, GroupsToRelease);
1314       return 0;
1315     }
1316 
1317     // ==================================================================== //
1318     // 4. Release the unused physical pages back to the OS.
1319     // ==================================================================== //
1320     RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap,
1321                                             Region->RegionBeg,
1322                                             Context.getReleaseOffset());
1323     auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
1324     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
1325     if (Recorder.getReleasedRangesCount() > 0) {
1326       // This is the case that we didn't hit the release threshold but it has
1327       // been past a certain period of time. Thus we try to release some pages
1328       // and if it does release some additional pages, it's hint that we are
1329       // able to lower the threshold. Currently, this case happens when the
1330       // `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As
1331       // a result, we shrink the threshold to half accordingly.
1332       // TODO(chiahungduan): Apply the same adjustment strategy to small blocks.
1333       if (!isSmallBlock(BlockSize)) {
1334         if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold &&
1335             Recorder.getReleasedBytes() >
1336                 Region->ReleaseInfo.LastReleasedBytes +
1337                     getMinReleaseAttemptSize(BlockSize)) {
1338           Region->ReleaseInfo.TryReleaseThreshold =
1339               Max(Region->ReleaseInfo.TryReleaseThreshold / 2,
1340                   getMinReleaseAttemptSize(BlockSize));
1341         }
1342       }
1343 
1344       Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
1345       Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
1346       Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
1347     }
1348     Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast();
1349 
1350     if (Region->ReleaseInfo.PendingPushedBytesDelta > 0) {
1351       // Instead of increasing the threshold by the amount of
1352       // `PendingPushedBytesDelta`, we only increase half of the amount so that
1353       // it won't be a leap (which may lead to higher memory pressure) because
1354       // of certain memory usage bursts which don't happen frequently.
1355       Region->ReleaseInfo.TryReleaseThreshold +=
1356           Region->ReleaseInfo.PendingPushedBytesDelta / 2;
1357       // This is another guard of avoiding the growth of threshold indefinitely.
1358       // Note that we may consider to make this configurable if we have a better
1359       // way to model this.
1360       Region->ReleaseInfo.TryReleaseThreshold = Min<uptr>(
1361           Region->ReleaseInfo.TryReleaseThreshold, (1UL << GroupSizeLog) / 2);
1362       Region->ReleaseInfo.PendingPushedBytesDelta = 0;
1363     }
1364 
1365     // ====================================================================== //
1366     // 5. Merge the `GroupsToRelease` back to the freelist.
1367     // ====================================================================== //
1368     mergeGroupsToReleaseBack(Region, GroupsToRelease);
1369 
1370     return Recorder.getReleasedBytes();
1371   }
1372 
hasChanceToReleasePages(RegionInfo * Region,uptr BlockSize,uptr BytesInFreeList,ReleaseToOS ReleaseType)1373   bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize,
1374                                uptr BytesInFreeList, ReleaseToOS ReleaseType)
1375       REQUIRES(Region->MMLock, Region->FLLock) {
1376     DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
1377               Region->FreeListInfo.PushedBlocks);
1378     // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
1379     // so that we won't underestimate the releasable pages. For example, the
1380     // following is the region usage,
1381     //
1382     //  BytesInFreeListAtLastCheckpoint   AllocatedUser
1383     //                v                         v
1384     //  |--------------------------------------->
1385     //         ^                   ^
1386     //  BytesInFreeList     ReleaseThreshold
1387     //
1388     // In general, if we have collected enough bytes and the amount of free
1389     // bytes meets the ReleaseThreshold, we will try to do page release. If we
1390     // don't update `BytesInFreeListAtLastCheckpoint` when the current
1391     // `BytesInFreeList` is smaller, we may take longer time to wait for enough
1392     // freed blocks because we miss the bytes between
1393     // (BytesInFreeListAtLastCheckpoint - BytesInFreeList).
1394     if (BytesInFreeList <=
1395         Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
1396       Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
1397     }
1398 
1399     const uptr RegionPushedBytesDelta =
1400         BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1401 
1402     if (ReleaseType == ReleaseToOS::Normal) {
1403       if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold / 2)
1404         return false;
1405 
1406       const s64 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
1407       if (IntervalMs < 0)
1408         return false;
1409 
1410       const u64 IntervalNs = static_cast<u64>(IntervalMs) * 1000000;
1411       const u64 CurTimeNs = getMonotonicTimeFast();
1412       const u64 DiffSinceLastReleaseNs =
1413           CurTimeNs - Region->ReleaseInfo.LastReleaseAtNs;
1414 
1415       // At here, `RegionPushedBytesDelta` is more than half of
1416       // `TryReleaseThreshold`. If the last release happened 2 release interval
1417       // before, we will still try to see if there's any chance to release some
1418       // memory even it doesn't exceed the threshold.
1419       if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold) {
1420         // We want the threshold to have a shorter response time to the variant
1421         // memory usage patterns. According to data collected during experiments
1422         // (which were done with 1, 2, 4, 8 intervals), `2` strikes the better
1423         // balance between the memory usage and number of page release attempts.
1424         if (DiffSinceLastReleaseNs < 2 * IntervalNs)
1425           return false;
1426       } else if (DiffSinceLastReleaseNs < IntervalNs) {
1427         // In this case, we are over the threshold but we just did some page
1428         // release in the same release interval. This is a hint that we may want
1429         // a higher threshold so that we can release more memory at once.
1430         // `TryReleaseThreshold` will be adjusted according to how many bytes
1431         // are not released, i.e., the `PendingPushedBytesdelta` here.
1432         // TODO(chiahungduan): Apply the same adjustment strategy to small
1433         // blocks.
1434         if (!isSmallBlock(BlockSize))
1435           Region->ReleaseInfo.PendingPushedBytesDelta = RegionPushedBytesDelta;
1436 
1437         // Memory was returned recently.
1438         return false;
1439       }
1440     } // if (ReleaseType == ReleaseToOS::Normal)
1441 
1442     return true;
1443   }
1444 
1445   SinglyLinkedList<BatchGroupT>
collectGroupsToRelease(RegionInfo * Region,const uptr BlockSize,const uptr AllocatedUserEnd,const uptr CompactPtrBase)1446   collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize,
1447                          const uptr AllocatedUserEnd, const uptr CompactPtrBase)
1448       REQUIRES(Region->MMLock, Region->FLLock) {
1449     const uptr GroupSize = (1UL << GroupSizeLog);
1450     const uptr PageSize = getPageSizeCached();
1451     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1452 
1453     // We are examining each group and will take the minimum distance to the
1454     // release threshold as the next `TryReleaseThreshold`. Note that if the
1455     // size of free blocks has reached the release threshold, the distance to
1456     // the next release will be PageSize * SmallerBlockReleasePageDelta. See the
1457     // comment on `SmallerBlockReleasePageDelta` for more details.
1458     uptr MinDistToThreshold = GroupSize;
1459 
1460     for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(),
1461                      *Prev = nullptr;
1462          BG != nullptr;) {
1463       // Group boundary is always GroupSize-aligned from CompactPtr base. The
1464       // layout of memory groups is like,
1465       //
1466       //     (CompactPtrBase)
1467       // #1 CompactPtrGroupBase   #2 CompactPtrGroupBase            ...
1468       //           |                       |                       |
1469       //           v                       v                       v
1470       //           +-----------------------+-----------------------+
1471       //            \                     / \                     /
1472       //             ---   GroupSize   ---   ---   GroupSize   ---
1473       //
1474       // After decompacting the CompactPtrGroupBase, we expect the alignment
1475       // property is held as well.
1476       const uptr BatchGroupBase =
1477           decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase);
1478       DCHECK_LE(Region->RegionBeg, BatchGroupBase);
1479       DCHECK_GE(AllocatedUserEnd, BatchGroupBase);
1480       DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U);
1481       // TransferBatches are pushed in front of BG.Batches. The first one may
1482       // not have all caches used.
1483       const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch +
1484                              BG->Batches.front()->getCount();
1485       const uptr BytesInBG = NumBlocks * BlockSize;
1486 
1487       if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) {
1488         BG->BytesInBGAtLastCheckpoint = BytesInBG;
1489         Prev = BG;
1490         BG = BG->Next;
1491         continue;
1492       }
1493 
1494       const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint;
1495       if (PushedBytesDelta < getMinReleaseAttemptSize(BlockSize)) {
1496         Prev = BG;
1497         BG = BG->Next;
1498         continue;
1499       }
1500 
1501       // Given the randomness property, we try to release the pages only if the
1502       // bytes used by free blocks exceed certain proportion of group size. Note
1503       // that this heuristic only applies when all the spaces in a BatchGroup
1504       // are allocated.
1505       if (isSmallBlock(BlockSize)) {
1506         const uptr BatchGroupEnd = BatchGroupBase + GroupSize;
1507         const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd
1508                                             ? GroupSize
1509                                             : AllocatedUserEnd - BatchGroupBase;
1510         const uptr ReleaseThreshold =
1511             (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U;
1512         const bool HighDensity = BytesInBG >= ReleaseThreshold;
1513         const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize);
1514         // If all blocks in the group are released, we will do range marking
1515         // which is fast. Otherwise, we will wait until we have accumulated
1516         // a certain amount of free memory.
1517         const bool ReachReleaseDelta =
1518             MayHaveReleasedAll
1519                 ? true
1520                 : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta;
1521 
1522         if (!HighDensity) {
1523           DCHECK_LE(BytesInBG, ReleaseThreshold);
1524           // The following is the usage of a memroy group,
1525           //
1526           //     BytesInBG             ReleaseThreshold
1527           //  /             \                 v
1528           //  +---+---------------------------+-----+
1529           //  |   |         |                 |     |
1530           //  +---+---------------------------+-----+
1531           //       \        /                       ^
1532           //    PushedBytesDelta                 GroupEnd
1533           MinDistToThreshold =
1534               Min(MinDistToThreshold,
1535                   ReleaseThreshold - BytesInBG + PushedBytesDelta);
1536         } else {
1537           // If it reaches high density at this round, the next time we will try
1538           // to release is based on SmallerBlockReleasePageDelta
1539           MinDistToThreshold =
1540               Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta);
1541         }
1542 
1543         if (!HighDensity || !ReachReleaseDelta) {
1544           Prev = BG;
1545           BG = BG->Next;
1546           continue;
1547         }
1548       }
1549 
1550       // If `BG` is the first BatchGroupT in the list, we only need to advance
1551       // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed
1552       // for `Prev`.
1553       //
1554       //         (BG)   (BG->Next)
1555       // Prev     Cur      BG
1556       //   |       |       |
1557       //   v       v       v
1558       //  nil     +--+    +--+
1559       //          |X | -> |  | -> ...
1560       //          +--+    +--+
1561       //
1562       // Otherwise, `Prev` will be used to extract the `Cur` from the
1563       // `FreeListInfo.BlockList`.
1564       //
1565       //         (BG)   (BG->Next)
1566       // Prev     Cur      BG
1567       //   |       |       |
1568       //   v       v       v
1569       //  +--+    +--+    +--+
1570       //  |  | -> |X | -> |  | -> ...
1571       //  +--+    +--+    +--+
1572       //
1573       // After FreeListInfo.BlockList::extract(),
1574       //
1575       // Prev     Cur       BG
1576       //   |       |        |
1577       //   v       v        v
1578       //  +--+    +--+     +--+
1579       //  |  |-+  |X |  +->|  | -> ...
1580       //  +--+ |  +--+  |  +--+
1581       //       +--------+
1582       //
1583       // Note that we need to advance before pushing this BatchGroup to
1584       // GroupsToRelease because it's a destructive operation.
1585 
1586       BatchGroupT *Cur = BG;
1587       BG = BG->Next;
1588 
1589       // Ideally, we may want to update this only after successful release.
1590       // However, for smaller blocks, each block marking is a costly operation.
1591       // Therefore, we update it earlier.
1592       // TODO: Consider updating this after releasing pages if `ReleaseRecorder`
1593       // can tell the released bytes in each group.
1594       Cur->BytesInBGAtLastCheckpoint = BytesInBG;
1595 
1596       if (Prev != nullptr)
1597         Region->FreeListInfo.BlockList.extract(Prev, Cur);
1598       else
1599         Region->FreeListInfo.BlockList.pop_front();
1600       GroupsToRelease.push_back(Cur);
1601     }
1602 
1603     // Only small blocks have the adaptive `TryReleaseThreshold`.
1604     if (isSmallBlock(BlockSize)) {
1605       // If the MinDistToThreshold is not updated, that means each memory group
1606       // may have only pushed less than a page size. In that case, just set it
1607       // back to normal.
1608       if (MinDistToThreshold == GroupSize)
1609         MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta;
1610       Region->ReleaseInfo.TryReleaseThreshold = MinDistToThreshold;
1611     }
1612 
1613     return GroupsToRelease;
1614   }
1615 
1616   PageReleaseContext
markFreeBlocks(RegionInfo * Region,const uptr BlockSize,const uptr AllocatedUserEnd,const uptr CompactPtrBase,SinglyLinkedList<BatchGroupT> & GroupsToRelease)1617   markFreeBlocks(RegionInfo *Region, const uptr BlockSize,
1618                  const uptr AllocatedUserEnd, const uptr CompactPtrBase,
1619                  SinglyLinkedList<BatchGroupT> &GroupsToRelease)
1620       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1621     const uptr GroupSize = (1UL << GroupSizeLog);
1622     auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
1623       return decompactPtrInternal(CompactPtrBase, CompactPtr);
1624     };
1625 
1626     const uptr ReleaseBase = decompactGroupBase(
1627         CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase);
1628     const uptr LastGroupEnd =
1629         Min(decompactGroupBase(CompactPtrBase,
1630                                GroupsToRelease.back()->CompactPtrGroupBase) +
1631                 GroupSize,
1632             AllocatedUserEnd);
1633     // The last block may straddle the group boundary. Rounding up to BlockSize
1634     // to get the exact range.
1635     const uptr ReleaseEnd =
1636         roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) +
1637         Region->RegionBeg;
1638     const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase;
1639     const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg;
1640 
1641     PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U,
1642                                ReleaseRangeSize, ReleaseOffset);
1643     // We may not be able to do the page release in a rare case that we may
1644     // fail on PageMap allocation.
1645     if (UNLIKELY(!Context.ensurePageMapAllocated()))
1646       return Context;
1647 
1648     for (BatchGroupT &BG : GroupsToRelease) {
1649       const uptr BatchGroupBase =
1650           decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase);
1651       const uptr BatchGroupEnd = BatchGroupBase + GroupSize;
1652       const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd
1653                                           ? GroupSize
1654                                           : AllocatedUserEnd - BatchGroupBase;
1655       const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize;
1656       const bool MayContainLastBlockInRegion =
1657           BatchGroupUsedEnd == AllocatedUserEnd;
1658       const bool BlockAlignedWithUsedEnd =
1659           (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0;
1660 
1661       uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize;
1662       if (!BlockAlignedWithUsedEnd)
1663         ++MaxContainedBlocks;
1664 
1665       const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
1666                              BG.Batches.front()->getCount();
1667 
1668       if (NumBlocks == MaxContainedBlocks) {
1669         for (const auto &It : BG.Batches) {
1670           if (&It != BG.Batches.front())
1671             DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch);
1672           for (u16 I = 0; I < It.getCount(); ++I)
1673             DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase);
1674         }
1675 
1676         Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd,
1677                                       Region->RegionBeg, /*RegionIndex=*/0,
1678                                       Region->MemMapInfo.AllocatedUser);
1679       } else {
1680         DCHECK_LT(NumBlocks, MaxContainedBlocks);
1681         // Note that we don't always visit blocks in each BatchGroup so that we
1682         // may miss the chance of releasing certain pages that cross
1683         // BatchGroups.
1684         Context.markFreeBlocksInRegion(
1685             BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0,
1686             Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion);
1687       }
1688     }
1689 
1690     DCHECK(Context.hasBlockMarked());
1691 
1692     return Context;
1693   }
1694 
mergeGroupsToReleaseBack(RegionInfo * Region,SinglyLinkedList<BatchGroupT> & GroupsToRelease)1695   void mergeGroupsToReleaseBack(RegionInfo *Region,
1696                                 SinglyLinkedList<BatchGroupT> &GroupsToRelease)
1697       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1698     ScopedLock L(Region->FLLock);
1699 
1700     // After merging two freelists, we may have redundant `BatchGroup`s that
1701     // need to be recycled. The number of unused `BatchGroup`s is expected to be
1702     // small. Pick a constant which is inferred from real programs.
1703     constexpr uptr MaxUnusedSize = 8;
1704     CompactPtrT Blocks[MaxUnusedSize];
1705     u32 Idx = 0;
1706     RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId);
1707     // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s
1708     // when we are manipulating the freelist of `BatchClassRegion`. Instead, we
1709     // should just push it back to the freelist when we merge two `BatchGroup`s.
1710     // This logic hasn't been implemented because we haven't supported releasing
1711     // pages in `BatchClassRegion`.
1712     DCHECK_NE(BatchClassRegion, Region);
1713 
1714     // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note
1715     // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are
1716     // sorted.
1717     for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(),
1718                      *Prev = nullptr;
1719          ;) {
1720       if (BG == nullptr || GroupsToRelease.empty()) {
1721         if (!GroupsToRelease.empty())
1722           Region->FreeListInfo.BlockList.append_back(&GroupsToRelease);
1723         break;
1724       }
1725 
1726       DCHECK(!BG->Batches.empty());
1727 
1728       if (BG->CompactPtrGroupBase <
1729           GroupsToRelease.front()->CompactPtrGroupBase) {
1730         Prev = BG;
1731         BG = BG->Next;
1732         continue;
1733       }
1734 
1735       BatchGroupT *Cur = GroupsToRelease.front();
1736       TransferBatchT *UnusedTransferBatch = nullptr;
1737       GroupsToRelease.pop_front();
1738 
1739       if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) {
1740         // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while
1741         // collecting the `GroupsToRelease`.
1742         BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint;
1743         const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch;
1744 
1745         // Note that the first TransferBatches in both `Batches` may not be
1746         // full and only the first TransferBatch can have non-full blocks. Thus
1747         // we have to merge them before appending one to another.
1748         if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) {
1749           BG->Batches.append_back(&Cur->Batches);
1750         } else {
1751           TransferBatchT *NonFullBatch = Cur->Batches.front();
1752           Cur->Batches.pop_front();
1753           const u16 NonFullBatchCount = NonFullBatch->getCount();
1754           // The remaining Batches in `Cur` are full.
1755           BG->Batches.append_back(&Cur->Batches);
1756 
1757           if (BG->Batches.front()->getCount() == MaxCachedPerBatch) {
1758             // Only 1 non-full TransferBatch, push it to the front.
1759             BG->Batches.push_front(NonFullBatch);
1760           } else {
1761             const u16 NumBlocksToMove = static_cast<u16>(
1762                 Min(static_cast<u16>(MaxCachedPerBatch -
1763                                      BG->Batches.front()->getCount()),
1764                     NonFullBatchCount));
1765             BG->Batches.front()->appendFromTransferBatch(NonFullBatch,
1766                                                          NumBlocksToMove);
1767             if (NonFullBatch->isEmpty())
1768               UnusedTransferBatch = NonFullBatch;
1769             else
1770               BG->Batches.push_front(NonFullBatch);
1771           }
1772         }
1773 
1774         const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U;
1775         if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) {
1776           ScopedLock L(BatchClassRegion->FLLock);
1777           pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
1778           if (conditionVariableEnabled())
1779             BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock);
1780           Idx = 0;
1781         }
1782         Blocks[Idx++] =
1783             compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(Cur));
1784         if (UnusedTransferBatch) {
1785           Blocks[Idx++] =
1786               compactPtr(SizeClassMap::BatchClassId,
1787                          reinterpret_cast<uptr>(UnusedTransferBatch));
1788         }
1789         Prev = BG;
1790         BG = BG->Next;
1791         continue;
1792       }
1793 
1794       // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase
1795       // larger than the first element in `GroupsToRelease`. We need to insert
1796       // `GroupsToRelease::front()` (which is `Cur` below)  before `BG`.
1797       //
1798       //   1. If `Prev` is nullptr, we simply push `Cur` to the front of
1799       //      FreeListInfo.BlockList.
1800       //   2. Otherwise, use `insert()` which inserts an element next to `Prev`.
1801       //
1802       // Afterwards, we don't need to advance `BG` because the order between
1803       // `BG` and the new `GroupsToRelease::front()` hasn't been checked.
1804       if (Prev == nullptr)
1805         Region->FreeListInfo.BlockList.push_front(Cur);
1806       else
1807         Region->FreeListInfo.BlockList.insert(Prev, Cur);
1808       DCHECK_EQ(Cur->Next, BG);
1809       Prev = Cur;
1810     }
1811 
1812     if (Idx != 0) {
1813       ScopedLock L(BatchClassRegion->FLLock);
1814       pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
1815       if (conditionVariableEnabled())
1816         BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock);
1817     }
1818 
1819     if (SCUDO_DEBUG) {
1820       BatchGroupT *Prev = Region->FreeListInfo.BlockList.front();
1821       for (BatchGroupT *Cur = Prev->Next; Cur != nullptr;
1822            Prev = Cur, Cur = Cur->Next) {
1823         CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase);
1824       }
1825     }
1826 
1827     if (conditionVariableEnabled())
1828       Region->FLLockCV.notifyAll(Region->FLLock);
1829   }
1830 
1831   // The minimum size of pushed blocks that we will try to release the pages in
1832   // that size class.
1833   uptr SmallerBlockReleasePageDelta = 0;
1834   atomic_s32 ReleaseToOsIntervalMs = {};
1835   alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses];
1836 };
1837 
1838 } // namespace scudo
1839 
1840 #endif // SCUDO_PRIMARY64_H_
1841