1 //===-- primary64.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef SCUDO_PRIMARY64_H_ 10 #define SCUDO_PRIMARY64_H_ 11 12 #include "allocator_common.h" 13 #include "bytemap.h" 14 #include "common.h" 15 #include "condition_variable.h" 16 #include "list.h" 17 #include "local_cache.h" 18 #include "mem_map.h" 19 #include "memtag.h" 20 #include "options.h" 21 #include "release.h" 22 #include "stats.h" 23 #include "string_utils.h" 24 #include "thread_annotations.h" 25 26 namespace scudo { 27 28 // SizeClassAllocator64 is an allocator tuned for 64-bit address space. 29 // 30 // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in 31 // Regions, specific to each size class. Note that the base of that mapping is 32 // random (based to the platform specific map() capabilities). If 33 // PrimaryEnableRandomOffset is set, each Region actually starts at a random 34 // offset from its base. 35 // 36 // Regions are mapped incrementally on demand to fulfill allocation requests, 37 // those mappings being split into equally sized Blocks based on the size class 38 // they belong to. The Blocks created are shuffled to prevent predictable 39 // address patterns (the predictability increases with the size of the Blocks). 40 // 41 // The 1st Region (for size class 0) holds the TransferBatches. This is a 42 // structure used to transfer arrays of available pointers from the class size 43 // freelist to the thread specific freelist, and back. 44 // 45 // The memory used by this allocator is never unmapped, but can be partially 46 // released if the platform allows for it. 47 48 template <typename Config> class SizeClassAllocator64 { 49 public: 50 typedef typename Config::CompactPtrT CompactPtrT; 51 typedef typename Config::SizeClassMap SizeClassMap; 52 typedef typename Config::ConditionVariableT ConditionVariableT; 53 static const uptr CompactPtrScale = Config::getCompactPtrScale(); 54 static const uptr RegionSizeLog = Config::getRegionSizeLog(); 55 static const uptr GroupSizeLog = Config::getGroupSizeLog(); 56 static_assert(RegionSizeLog >= GroupSizeLog, 57 "Group size shouldn't be greater than the region size"); 58 static const uptr GroupScale = GroupSizeLog - CompactPtrScale; 59 typedef SizeClassAllocator64<Config> ThisT; 60 typedef SizeClassAllocatorLocalCache<ThisT> CacheT; 61 typedef TransferBatch<ThisT> TransferBatchT; 62 typedef BatchGroup<ThisT> BatchGroupT; 63 64 // BachClass is used to store internal metadata so it needs to be at least as 65 // large as the largest data structure. getSizeByClassId(uptr ClassId)66 static uptr getSizeByClassId(uptr ClassId) { 67 return (ClassId == SizeClassMap::BatchClassId) 68 ? roundUp(Max(sizeof(TransferBatchT), sizeof(BatchGroupT)), 69 1U << CompactPtrScale) 70 : SizeClassMap::getSizeByClassId(ClassId); 71 } 72 canAllocate(uptr Size)73 static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } 74 conditionVariableEnabled()75 static bool conditionVariableEnabled() { 76 return Config::hasConditionVariableT(); 77 } 78 init(s32 ReleaseToOsInterval)79 void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { 80 DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); 81 82 const uptr PageSize = getPageSizeCached(); 83 const uptr GroupSize = (1UL << GroupSizeLog); 84 const uptr PagesInGroup = GroupSize / PageSize; 85 const uptr MinSizeClass = getSizeByClassId(1); 86 // When trying to release pages back to memory, visiting smaller size 87 // classes is expensive. Therefore, we only try to release smaller size 88 // classes when the amount of free blocks goes over a certain threshold (See 89 // the comment in releaseToOSMaybe() for more details). For example, for 90 // size class 32, we only do the release when the size of free blocks is 91 // greater than 97% of pages in a group. However, this may introduce another 92 // issue that if the number of free blocks is bouncing between 97% ~ 100%. 93 // Which means we may try many page releases but only release very few of 94 // them (less than 3% in a group). Even though we have 95 // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these 96 // calls but it will be better to have another guard to mitigate this issue. 97 // 98 // Here we add another constraint on the minimum size requirement. The 99 // constraint is determined by the size of in-use blocks in the minimal size 100 // class. Take size class 32 as an example, 101 // 102 // +- one memory group -+ 103 // +----------------------+------+ 104 // | 97% of free blocks | | 105 // +----------------------+------+ 106 // \ / 107 // 3% in-use blocks 108 // 109 // * The release size threshold is 97%. 110 // 111 // The 3% size in a group is about 7 pages. For two consecutive 112 // releaseToOSMaybe(), we require the difference between `PushedBlocks` 113 // should be greater than 7 pages. This mitigates the page releasing 114 // thrashing which is caused by memory usage bouncing around the threshold. 115 // The smallest size class takes longest time to do the page release so we 116 // use its size of in-use blocks as a heuristic. 117 SmallerBlockReleasePageDelta = 118 PagesInGroup * (1 + MinSizeClass / 16U) / 100; 119 120 u32 Seed; 121 const u64 Time = getMonotonicTimeFast(); 122 if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) 123 Seed = static_cast<u32>(Time ^ (reinterpret_cast<uptr>(&Seed) >> 12)); 124 125 for (uptr I = 0; I < NumClasses; I++) 126 getRegionInfo(I)->RandState = getRandomU32(&Seed); 127 128 if (Config::getEnableContiguousRegions()) { 129 ReservedMemoryT ReservedMemory = {}; 130 // Reserve the space required for the Primary. 131 CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses, 132 "scudo:primary_reserve")); 133 const uptr PrimaryBase = ReservedMemory.getBase(); 134 135 for (uptr I = 0; I < NumClasses; I++) { 136 MemMapT RegionMemMap = ReservedMemory.dispatch( 137 PrimaryBase + (I << RegionSizeLog), RegionSize); 138 RegionInfo *Region = getRegionInfo(I); 139 140 initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset()); 141 } 142 shuffle(RegionInfoArray, NumClasses, &Seed); 143 } 144 145 // The binding should be done after region shuffling so that it won't bind 146 // the FLLock from the wrong region. 147 for (uptr I = 0; I < NumClasses; I++) 148 getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock); 149 150 // The default value in the primary config has the higher priority. 151 if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) 152 ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); 153 setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); 154 } 155 unmapTestOnly()156 void unmapTestOnly() { 157 for (uptr I = 0; I < NumClasses; I++) { 158 RegionInfo *Region = getRegionInfo(I); 159 { 160 ScopedLock ML(Region->MMLock); 161 MemMapT MemMap = Region->MemMapInfo.MemMap; 162 if (MemMap.isAllocated()) 163 MemMap.unmap(); 164 } 165 *Region = {}; 166 } 167 } 168 169 // When all blocks are freed, it has to be the same size as `AllocatedUser`. verifyAllBlocksAreReleasedTestOnly()170 void verifyAllBlocksAreReleasedTestOnly() { 171 // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass. 172 uptr BatchClassUsedInFreeLists = 0; 173 for (uptr I = 0; I < NumClasses; I++) { 174 // We have to count BatchClassUsedInFreeLists in other regions first. 175 if (I == SizeClassMap::BatchClassId) 176 continue; 177 RegionInfo *Region = getRegionInfo(I); 178 ScopedLock ML(Region->MMLock); 179 ScopedLock FL(Region->FLLock); 180 const uptr BlockSize = getSizeByClassId(I); 181 uptr TotalBlocks = 0; 182 for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { 183 // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`. 184 BatchClassUsedInFreeLists += BG.Batches.size() + 1; 185 for (const auto &It : BG.Batches) 186 TotalBlocks += It.getCount(); 187 } 188 189 DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize); 190 DCHECK_EQ(Region->FreeListInfo.PushedBlocks, 191 Region->FreeListInfo.PoppedBlocks); 192 } 193 194 RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId); 195 ScopedLock ML(Region->MMLock); 196 ScopedLock FL(Region->FLLock); 197 const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); 198 uptr TotalBlocks = 0; 199 for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { 200 if (LIKELY(!BG.Batches.empty())) { 201 for (const auto &It : BG.Batches) 202 TotalBlocks += It.getCount(); 203 } else { 204 // `BatchGroup` with empty freelist doesn't have `TransferBatch` record 205 // itself. 206 ++TotalBlocks; 207 } 208 } 209 DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, 210 Region->MemMapInfo.AllocatedUser / BlockSize); 211 DCHECK_GE(Region->FreeListInfo.PoppedBlocks, 212 Region->FreeListInfo.PushedBlocks); 213 const uptr BlocksInUse = 214 Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; 215 DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); 216 } 217 popBlocks(CacheT * C,uptr ClassId,CompactPtrT * ToArray,const u16 MaxBlockCount)218 u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, 219 const u16 MaxBlockCount) { 220 DCHECK_LT(ClassId, NumClasses); 221 RegionInfo *Region = getRegionInfo(ClassId); 222 u16 PopCount = 0; 223 224 { 225 ScopedLock L(Region->FLLock); 226 PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); 227 if (PopCount != 0U) 228 return PopCount; 229 } 230 231 bool ReportRegionExhausted = false; 232 233 if (conditionVariableEnabled()) { 234 PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount, 235 ReportRegionExhausted); 236 } else { 237 while (true) { 238 // When two threads compete for `Region->MMLock`, we only want one of 239 // them to call populateFreeListAndPopBlocks(). To avoid both of them 240 // doing that, always check the freelist before mapping new pages. 241 ScopedLock ML(Region->MMLock); 242 { 243 ScopedLock FL(Region->FLLock); 244 PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); 245 if (PopCount != 0U) 246 return PopCount; 247 } 248 249 const bool RegionIsExhausted = Region->Exhausted; 250 if (!RegionIsExhausted) { 251 PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, 252 MaxBlockCount); 253 } 254 ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; 255 break; 256 } 257 } 258 259 if (UNLIKELY(ReportRegionExhausted)) { 260 Printf("Can't populate more pages for size class %zu.\n", 261 getSizeByClassId(ClassId)); 262 263 // Theoretically, BatchClass shouldn't be used up. Abort immediately when 264 // it happens. 265 if (ClassId == SizeClassMap::BatchClassId) 266 reportOutOfBatchClass(); 267 } 268 269 return PopCount; 270 } 271 272 // Push the array of free blocks to the designated batch group. pushBlocks(CacheT * C,uptr ClassId,CompactPtrT * Array,u32 Size)273 void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) { 274 DCHECK_LT(ClassId, NumClasses); 275 DCHECK_GT(Size, 0); 276 277 RegionInfo *Region = getRegionInfo(ClassId); 278 if (ClassId == SizeClassMap::BatchClassId) { 279 ScopedLock L(Region->FLLock); 280 pushBatchClassBlocks(Region, Array, Size); 281 if (conditionVariableEnabled()) 282 Region->FLLockCV.notifyAll(Region->FLLock); 283 return; 284 } 285 286 // TODO(chiahungduan): Consider not doing grouping if the group size is not 287 // greater than the block size with a certain scale. 288 289 bool SameGroup = true; 290 if (GroupSizeLog < RegionSizeLog) { 291 // Sort the blocks so that blocks belonging to the same group can be 292 // pushed together. 293 for (u32 I = 1; I < Size; ++I) { 294 if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) 295 SameGroup = false; 296 CompactPtrT Cur = Array[I]; 297 u32 J = I; 298 while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { 299 Array[J] = Array[J - 1]; 300 --J; 301 } 302 Array[J] = Cur; 303 } 304 } 305 306 { 307 ScopedLock L(Region->FLLock); 308 pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup); 309 if (conditionVariableEnabled()) 310 Region->FLLockCV.notifyAll(Region->FLLock); 311 } 312 } 313 disable()314 void disable() NO_THREAD_SAFETY_ANALYSIS { 315 // The BatchClassId must be locked last since other classes can use it. 316 for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) { 317 if (static_cast<uptr>(I) == SizeClassMap::BatchClassId) 318 continue; 319 getRegionInfo(static_cast<uptr>(I))->MMLock.lock(); 320 getRegionInfo(static_cast<uptr>(I))->FLLock.lock(); 321 } 322 getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock(); 323 getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock(); 324 } 325 enable()326 void enable() NO_THREAD_SAFETY_ANALYSIS { 327 getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock(); 328 getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock(); 329 for (uptr I = 0; I < NumClasses; I++) { 330 if (I == SizeClassMap::BatchClassId) 331 continue; 332 getRegionInfo(I)->FLLock.unlock(); 333 getRegionInfo(I)->MMLock.unlock(); 334 } 335 } 336 iterateOverBlocks(F Callback)337 template <typename F> void iterateOverBlocks(F Callback) { 338 for (uptr I = 0; I < NumClasses; I++) { 339 if (I == SizeClassMap::BatchClassId) 340 continue; 341 RegionInfo *Region = getRegionInfo(I); 342 // TODO: The call of `iterateOverBlocks` requires disabling 343 // SizeClassAllocator64. We may consider locking each region on demand 344 // only. 345 Region->FLLock.assertHeld(); 346 Region->MMLock.assertHeld(); 347 const uptr BlockSize = getSizeByClassId(I); 348 const uptr From = Region->RegionBeg; 349 const uptr To = From + Region->MemMapInfo.AllocatedUser; 350 for (uptr Block = From; Block < To; Block += BlockSize) 351 Callback(Block); 352 } 353 } 354 getStats(ScopedString * Str)355 void getStats(ScopedString *Str) { 356 // TODO(kostyak): get the RSS per region. 357 uptr TotalMapped = 0; 358 uptr PoppedBlocks = 0; 359 uptr PushedBlocks = 0; 360 for (uptr I = 0; I < NumClasses; I++) { 361 RegionInfo *Region = getRegionInfo(I); 362 { 363 ScopedLock L(Region->MMLock); 364 TotalMapped += Region->MemMapInfo.MappedUser; 365 } 366 { 367 ScopedLock L(Region->FLLock); 368 PoppedBlocks += Region->FreeListInfo.PoppedBlocks; 369 PushedBlocks += Region->FreeListInfo.PushedBlocks; 370 } 371 } 372 const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); 373 Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu " 374 "allocations; remains %zu; ReleaseToOsIntervalMs = %d\n", 375 TotalMapped >> 20, 0U, PoppedBlocks, 376 PoppedBlocks - PushedBlocks, IntervalMs >= 0 ? IntervalMs : -1); 377 378 for (uptr I = 0; I < NumClasses; I++) { 379 RegionInfo *Region = getRegionInfo(I); 380 ScopedLock L1(Region->MMLock); 381 ScopedLock L2(Region->FLLock); 382 getStats(Str, I, Region); 383 } 384 } 385 getFragmentationInfo(ScopedString * Str)386 void getFragmentationInfo(ScopedString *Str) { 387 Str->append( 388 "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", 389 getPageSizeCached()); 390 391 for (uptr I = 1; I < NumClasses; I++) { 392 RegionInfo *Region = getRegionInfo(I); 393 ScopedLock L(Region->MMLock); 394 getRegionFragmentationInfo(Region, I, Str); 395 } 396 } 397 getMemoryGroupFragmentationInfo(ScopedString * Str)398 void getMemoryGroupFragmentationInfo(ScopedString *Str) { 399 Str->append( 400 "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", 401 getPageSizeCached()); 402 403 for (uptr I = 1; I < NumClasses; I++) { 404 RegionInfo *Region = getRegionInfo(I); 405 ScopedLock L(Region->MMLock); 406 getMemoryGroupFragmentationInfoInRegion(Region, I, Str); 407 } 408 } 409 setOption(Option O,sptr Value)410 bool setOption(Option O, sptr Value) { 411 if (O == Option::ReleaseInterval) { 412 const s32 Interval = Max( 413 Min(static_cast<s32>(Value), Config::getMaxReleaseToOsIntervalMs()), 414 Config::getMinReleaseToOsIntervalMs()); 415 atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); 416 return true; 417 } 418 // Not supported by the Primary, but not an error either. 419 return true; 420 } 421 tryReleaseToOS(uptr ClassId,ReleaseToOS ReleaseType)422 uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { 423 RegionInfo *Region = getRegionInfo(ClassId); 424 // Note that the tryLock() may fail spuriously, given that it should rarely 425 // happen and page releasing is fine to skip, we don't take certain 426 // approaches to ensure one page release is done. 427 if (Region->MMLock.tryLock()) { 428 uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType); 429 Region->MMLock.unlock(); 430 return BytesReleased; 431 } 432 return 0; 433 } 434 releaseToOS(ReleaseToOS ReleaseType)435 uptr releaseToOS(ReleaseToOS ReleaseType) { 436 uptr TotalReleasedBytes = 0; 437 for (uptr I = 0; I < NumClasses; I++) { 438 if (I == SizeClassMap::BatchClassId) 439 continue; 440 RegionInfo *Region = getRegionInfo(I); 441 ScopedLock L(Region->MMLock); 442 TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType); 443 } 444 return TotalReleasedBytes; 445 } 446 getRegionInfoArrayAddress()447 const char *getRegionInfoArrayAddress() const { 448 return reinterpret_cast<const char *>(RegionInfoArray); 449 } 450 getRegionInfoArraySize()451 static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } 452 getCompactPtrBaseByClassId(uptr ClassId)453 uptr getCompactPtrBaseByClassId(uptr ClassId) { 454 return getRegionInfo(ClassId)->RegionBeg; 455 } 456 compactPtr(uptr ClassId,uptr Ptr)457 CompactPtrT compactPtr(uptr ClassId, uptr Ptr) { 458 DCHECK_LE(ClassId, SizeClassMap::LargestClassId); 459 return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr); 460 } 461 decompactPtr(uptr ClassId,CompactPtrT CompactPtr)462 void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) { 463 DCHECK_LE(ClassId, SizeClassMap::LargestClassId); 464 return reinterpret_cast<void *>( 465 decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); 466 } 467 findNearestBlock(const char * RegionInfoData,uptr Ptr)468 static BlockInfo findNearestBlock(const char *RegionInfoData, 469 uptr Ptr) NO_THREAD_SAFETY_ANALYSIS { 470 const RegionInfo *RegionInfoArray = 471 reinterpret_cast<const RegionInfo *>(RegionInfoData); 472 473 uptr ClassId; 474 uptr MinDistance = -1UL; 475 for (uptr I = 0; I != NumClasses; ++I) { 476 if (I == SizeClassMap::BatchClassId) 477 continue; 478 uptr Begin = RegionInfoArray[I].RegionBeg; 479 // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock. 480 // However, the RegionInfoData is passed with const qualifier and lock the 481 // mutex requires modifying RegionInfoData, which means we need to remove 482 // the const qualifier. This may lead to another undefined behavior (The 483 // first one is accessing `AllocatedUser` without locking. It's better to 484 // pass `RegionInfoData` as `void *` then we can lock the mutex properly. 485 uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser; 486 if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) 487 continue; 488 uptr RegionDistance; 489 if (Begin <= Ptr) { 490 if (Ptr < End) 491 RegionDistance = 0; 492 else 493 RegionDistance = Ptr - End; 494 } else { 495 RegionDistance = Begin - Ptr; 496 } 497 498 if (RegionDistance < MinDistance) { 499 MinDistance = RegionDistance; 500 ClassId = I; 501 } 502 } 503 504 BlockInfo B = {}; 505 if (MinDistance <= 8192) { 506 B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; 507 B.RegionEnd = 508 B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser; 509 B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); 510 B.BlockBegin = 511 B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * 512 sptr(B.BlockSize)); 513 while (B.BlockBegin < B.RegionBegin) 514 B.BlockBegin += B.BlockSize; 515 while (B.RegionEnd < B.BlockBegin + B.BlockSize) 516 B.BlockBegin -= B.BlockSize; 517 } 518 return B; 519 } 520 521 AtomicOptions Options; 522 523 private: 524 static const uptr RegionSize = 1UL << RegionSizeLog; 525 static const uptr NumClasses = SizeClassMap::NumClasses; 526 527 static const uptr MapSizeIncrement = Config::getMapSizeIncrement(); 528 // Fill at most this number of batches from the newly map'd memory. 529 static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; 530 531 struct ReleaseToOsInfo { 532 uptr BytesInFreeListAtLastCheckpoint; 533 uptr RangesReleased; 534 uptr LastReleasedBytes; 535 // The minimum size of pushed blocks to trigger page release. 536 uptr TryReleaseThreshold; 537 // The number of bytes not triggering `releaseToOSMaybe()` because of 538 // the length of release interval. 539 uptr PendingPushedBytesDelta; 540 u64 LastReleaseAtNs; 541 }; 542 543 struct BlocksInfo { 544 SinglyLinkedList<BatchGroupT> BlockList = {}; 545 uptr PoppedBlocks = 0; 546 uptr PushedBlocks = 0; 547 }; 548 549 struct PagesInfo { 550 MemMapT MemMap = {}; 551 // Bytes mapped for user memory. 552 uptr MappedUser = 0; 553 // Bytes allocated for user memory. 554 uptr AllocatedUser = 0; 555 }; 556 557 struct UnpaddedRegionInfo { 558 // Mutex for operations on freelist 559 HybridMutex FLLock; 560 ConditionVariableT FLLockCV GUARDED_BY(FLLock); 561 // Mutex for memmap operations 562 HybridMutex MMLock ACQUIRED_BEFORE(FLLock); 563 // `RegionBeg` is initialized before thread creation and won't be changed. 564 uptr RegionBeg = 0; 565 u32 RandState GUARDED_BY(MMLock) = 0; 566 BlocksInfo FreeListInfo GUARDED_BY(FLLock); 567 PagesInfo MemMapInfo GUARDED_BY(MMLock); 568 ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {}; 569 bool Exhausted GUARDED_BY(MMLock) = false; 570 bool isPopulatingFreeList GUARDED_BY(FLLock) = false; 571 }; 572 struct RegionInfo : UnpaddedRegionInfo { 573 char Padding[SCUDO_CACHE_LINE_SIZE - 574 (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {}; 575 }; 576 static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); 577 getRegionInfo(uptr ClassId)578 RegionInfo *getRegionInfo(uptr ClassId) { 579 DCHECK_LT(ClassId, NumClasses); 580 return &RegionInfoArray[ClassId]; 581 } 582 getRegionBaseByClassId(uptr ClassId)583 uptr getRegionBaseByClassId(uptr ClassId) { 584 RegionInfo *Region = getRegionInfo(ClassId); 585 Region->MMLock.assertHeld(); 586 587 if (!Config::getEnableContiguousRegions() && 588 !Region->MemMapInfo.MemMap.isAllocated()) { 589 return 0U; 590 } 591 return Region->MemMapInfo.MemMap.getBase(); 592 } 593 compactPtrInternal(uptr Base,uptr Ptr)594 static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { 595 return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale); 596 } 597 decompactPtrInternal(uptr Base,CompactPtrT CompactPtr)598 static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) { 599 return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale); 600 } 601 compactPtrGroup(CompactPtrT CompactPtr)602 static uptr compactPtrGroup(CompactPtrT CompactPtr) { 603 const uptr Mask = (static_cast<uptr>(1) << GroupScale) - 1; 604 return static_cast<uptr>(CompactPtr) & ~Mask; 605 } decompactGroupBase(uptr Base,uptr CompactPtrGroupBase)606 static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) { 607 DCHECK_EQ(CompactPtrGroupBase % (static_cast<uptr>(1) << (GroupScale)), 0U); 608 return Base + (CompactPtrGroupBase << CompactPtrScale); 609 } 610 isSmallBlock(uptr BlockSize)611 ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { 612 const uptr PageSize = getPageSizeCached(); 613 return BlockSize < PageSize / 16U; 614 } 615 getMinReleaseAttemptSize(uptr BlockSize)616 ALWAYS_INLINE uptr getMinReleaseAttemptSize(uptr BlockSize) { 617 return roundUp(BlockSize, getPageSizeCached()); 618 } 619 initRegion(RegionInfo * Region,uptr ClassId,MemMapT MemMap,bool EnableRandomOffset)620 ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId, 621 MemMapT MemMap, bool EnableRandomOffset) 622 REQUIRES(Region->MMLock) { 623 DCHECK(!Region->MemMapInfo.MemMap.isAllocated()); 624 DCHECK(MemMap.isAllocated()); 625 626 const uptr PageSize = getPageSizeCached(); 627 628 Region->MemMapInfo.MemMap = MemMap; 629 630 Region->RegionBeg = MemMap.getBase(); 631 if (EnableRandomOffset) { 632 Region->RegionBeg += 633 (getRandomModN(&Region->RandState, 16) + 1) * PageSize; 634 } 635 636 const uptr BlockSize = getSizeByClassId(ClassId); 637 // Releasing small blocks is expensive, set a higher threshold to avoid 638 // frequent page releases. 639 if (isSmallBlock(BlockSize)) { 640 Region->ReleaseInfo.TryReleaseThreshold = 641 PageSize * SmallerBlockReleasePageDelta; 642 } else { 643 Region->ReleaseInfo.TryReleaseThreshold = 644 getMinReleaseAttemptSize(BlockSize); 645 } 646 } 647 pushBatchClassBlocks(RegionInfo * Region,CompactPtrT * Array,u32 Size)648 void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size) 649 REQUIRES(Region->FLLock) { 650 DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId)); 651 652 // Free blocks are recorded by TransferBatch in freelist for all 653 // size-classes. In addition, TransferBatch is allocated from BatchClassId. 654 // In order not to use additional block to record the free blocks in 655 // BatchClassId, they are self-contained. I.e., A TransferBatch records the 656 // block address of itself. See the figure below: 657 // 658 // TransferBatch at 0xABCD 659 // +----------------------------+ 660 // | Free blocks' addr | 661 // | +------+------+------+ | 662 // | |0xABCD|... |... | | 663 // | +------+------+------+ | 664 // +----------------------------+ 665 // 666 // When we allocate all the free blocks in the TransferBatch, the block used 667 // by TransferBatch is also free for use. We don't need to recycle the 668 // TransferBatch. Note that the correctness is maintained by the invariant, 669 // 670 // Each popBlocks() request returns the entire TransferBatch. Returning 671 // part of the blocks in a TransferBatch is invalid. 672 // 673 // This ensures that TransferBatch won't leak the address itself while it's 674 // still holding other valid data. 675 // 676 // Besides, BatchGroup is also allocated from BatchClassId and has its 677 // address recorded in the TransferBatch too. To maintain the correctness, 678 // 679 // The address of BatchGroup is always recorded in the last TransferBatch 680 // in the freelist (also imply that the freelist should only be 681 // updated with push_front). Once the last TransferBatch is popped, 682 // the block used by BatchGroup is also free for use. 683 // 684 // With this approach, the blocks used by BatchGroup and TransferBatch are 685 // reusable and don't need additional space for them. 686 687 Region->FreeListInfo.PushedBlocks += Size; 688 BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); 689 690 if (BG == nullptr) { 691 // Construct `BatchGroup` on the last element. 692 BG = reinterpret_cast<BatchGroupT *>( 693 decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); 694 --Size; 695 BG->Batches.clear(); 696 // BatchClass hasn't enabled memory group. Use `0` to indicate there's no 697 // memory group here. 698 BG->CompactPtrGroupBase = 0; 699 BG->BytesInBGAtLastCheckpoint = 0; 700 BG->MaxCachedPerBatch = 701 CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId)); 702 703 Region->FreeListInfo.BlockList.push_front(BG); 704 } 705 706 if (UNLIKELY(Size == 0)) 707 return; 708 709 // This happens under 2 cases. 710 // 1. just allocated a new `BatchGroup`. 711 // 2. Only 1 block is pushed when the freelist is empty. 712 if (BG->Batches.empty()) { 713 // Construct the `TransferBatch` on the last element. 714 TransferBatchT *TB = reinterpret_cast<TransferBatchT *>( 715 decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); 716 TB->clear(); 717 // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are 718 // recorded in the TransferBatch. 719 TB->add(Array[Size - 1]); 720 TB->add( 721 compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG))); 722 --Size; 723 BG->Batches.push_front(TB); 724 } 725 726 TransferBatchT *CurBatch = BG->Batches.front(); 727 DCHECK_NE(CurBatch, nullptr); 728 729 for (u32 I = 0; I < Size;) { 730 u16 UnusedSlots = 731 static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); 732 if (UnusedSlots == 0) { 733 CurBatch = reinterpret_cast<TransferBatchT *>( 734 decompactPtr(SizeClassMap::BatchClassId, Array[I])); 735 CurBatch->clear(); 736 // Self-contained 737 CurBatch->add(Array[I]); 738 ++I; 739 // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of 740 // BatchClassId. 741 BG->Batches.push_front(CurBatch); 742 UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - 1); 743 } 744 // `UnusedSlots` is u16 so the result will be also fit in u16. 745 const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I)); 746 CurBatch->appendFromArray(&Array[I], AppendSize); 747 I += AppendSize; 748 } 749 } 750 751 // Push the blocks to their batch group. The layout will be like, 752 // 753 // FreeListInfo.BlockList - > BG -> BG -> BG 754 // | | | 755 // v v v 756 // TB TB TB 757 // | 758 // v 759 // TB 760 // 761 // Each BlockGroup(BG) will associate with unique group id and the free blocks 762 // are managed by a list of TransferBatch(TB). To reduce the time of inserting 763 // blocks, BGs are sorted and the input `Array` are supposed to be sorted so 764 // that we can get better performance of maintaining sorted property. 765 // Use `SameGroup=true` to indicate that all blocks in the array are from the 766 // same group then we will skip checking the group id of each block. 767 void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, 768 CompactPtrT *Array, u32 Size, bool SameGroup = false) 769 REQUIRES(Region->FLLock) { 770 DCHECK_NE(ClassId, SizeClassMap::BatchClassId); 771 DCHECK_GT(Size, 0U); 772 773 auto CreateGroup = [&](uptr CompactPtrGroupBase) { 774 BatchGroupT *BG = 775 reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock()); 776 BG->Batches.clear(); 777 TransferBatchT *TB = 778 reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock()); 779 TB->clear(); 780 781 BG->CompactPtrGroupBase = CompactPtrGroupBase; 782 BG->Batches.push_front(TB); 783 BG->BytesInBGAtLastCheckpoint = 0; 784 BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; 785 786 return BG; 787 }; 788 789 auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { 790 SinglyLinkedList<TransferBatchT> &Batches = BG->Batches; 791 TransferBatchT *CurBatch = Batches.front(); 792 DCHECK_NE(CurBatch, nullptr); 793 794 for (u32 I = 0; I < Size;) { 795 DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); 796 u16 UnusedSlots = 797 static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount()); 798 if (UnusedSlots == 0) { 799 CurBatch = 800 reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock()); 801 CurBatch->clear(); 802 Batches.push_front(CurBatch); 803 UnusedSlots = BG->MaxCachedPerBatch; 804 } 805 // `UnusedSlots` is u16 so the result will be also fit in u16. 806 u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I)); 807 CurBatch->appendFromArray(&Array[I], AppendSize); 808 I += AppendSize; 809 } 810 }; 811 812 Region->FreeListInfo.PushedBlocks += Size; 813 BatchGroupT *Cur = Region->FreeListInfo.BlockList.front(); 814 815 // In the following, `Cur` always points to the BatchGroup for blocks that 816 // will be pushed next. `Prev` is the element right before `Cur`. 817 BatchGroupT *Prev = nullptr; 818 819 while (Cur != nullptr && 820 compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) { 821 Prev = Cur; 822 Cur = Cur->Next; 823 } 824 825 if (Cur == nullptr || 826 compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) { 827 Cur = CreateGroup(compactPtrGroup(Array[0])); 828 if (Prev == nullptr) 829 Region->FreeListInfo.BlockList.push_front(Cur); 830 else 831 Region->FreeListInfo.BlockList.insert(Prev, Cur); 832 } 833 834 // All the blocks are from the same group, just push without checking group 835 // id. 836 if (SameGroup) { 837 for (u32 I = 0; I < Size; ++I) 838 DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase); 839 840 InsertBlocks(Cur, Array, Size); 841 return; 842 } 843 844 // The blocks are sorted by group id. Determine the segment of group and 845 // push them to their group together. 846 u32 Count = 1; 847 for (u32 I = 1; I < Size; ++I) { 848 if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { 849 DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase); 850 InsertBlocks(Cur, Array + I - Count, Count); 851 852 while (Cur != nullptr && 853 compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) { 854 Prev = Cur; 855 Cur = Cur->Next; 856 } 857 858 if (Cur == nullptr || 859 compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) { 860 Cur = CreateGroup(compactPtrGroup(Array[I])); 861 DCHECK_NE(Prev, nullptr); 862 Region->FreeListInfo.BlockList.insert(Prev, Cur); 863 } 864 865 Count = 1; 866 } else { 867 ++Count; 868 } 869 } 870 871 InsertBlocks(Cur, Array + Size - Count, Count); 872 } 873 popBlocksWithCV(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount,bool & ReportRegionExhausted)874 u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, 875 CompactPtrT *ToArray, const u16 MaxBlockCount, 876 bool &ReportRegionExhausted) { 877 u16 PopCount = 0; 878 879 while (true) { 880 // We only expect one thread doing the freelist refillment and other 881 // threads will be waiting for either the completion of the 882 // `populateFreeListAndPopBlocks()` or `pushBlocks()` called by other 883 // threads. 884 bool PopulateFreeList = false; 885 { 886 ScopedLock FL(Region->FLLock); 887 if (!Region->isPopulatingFreeList) { 888 Region->isPopulatingFreeList = true; 889 PopulateFreeList = true; 890 } 891 } 892 893 if (PopulateFreeList) { 894 ScopedLock ML(Region->MMLock); 895 896 const bool RegionIsExhausted = Region->Exhausted; 897 if (!RegionIsExhausted) { 898 PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, 899 MaxBlockCount); 900 } 901 ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; 902 903 { 904 // Before reacquiring the `FLLock`, the freelist may be used up again 905 // and some threads are waiting for the freelist refillment by the 906 // current thread. It's important to set 907 // `Region->isPopulatingFreeList` to false so the threads about to 908 // sleep will notice the status change. 909 ScopedLock FL(Region->FLLock); 910 Region->isPopulatingFreeList = false; 911 Region->FLLockCV.notifyAll(Region->FLLock); 912 } 913 914 break; 915 } 916 917 // At here, there are two preconditions to be met before waiting, 918 // 1. The freelist is empty. 919 // 2. Region->isPopulatingFreeList == true, i.e, someone is still doing 920 // `populateFreeListAndPopBlocks()`. 921 // 922 // Note that it has the chance that freelist is empty but 923 // Region->isPopulatingFreeList == false because all the new populated 924 // blocks were used up right after the refillment. Therefore, we have to 925 // check if someone is still populating the freelist. 926 ScopedLock FL(Region->FLLock); 927 PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); 928 if (PopCount != 0U) 929 break; 930 931 if (!Region->isPopulatingFreeList) 932 continue; 933 934 // Now the freelist is empty and someone's doing the refillment. We will 935 // wait until anyone refills the freelist or someone finishes doing 936 // `populateFreeListAndPopBlocks()`. The refillment can be done by 937 // `populateFreeListAndPopBlocks()`, `pushBlocks()`, 938 // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. 939 Region->FLLockCV.wait(Region->FLLock); 940 941 PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); 942 if (PopCount != 0U) 943 break; 944 } 945 946 return PopCount; 947 } 948 popBlocksImpl(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount)949 u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, 950 CompactPtrT *ToArray, const u16 MaxBlockCount) 951 REQUIRES(Region->FLLock) { 952 if (Region->FreeListInfo.BlockList.empty()) 953 return 0U; 954 955 SinglyLinkedList<TransferBatchT> &Batches = 956 Region->FreeListInfo.BlockList.front()->Batches; 957 958 if (Batches.empty()) { 959 DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); 960 BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); 961 Region->FreeListInfo.BlockList.pop_front(); 962 963 // Block used by `BatchGroup` is from BatchClassId. Turn the block into 964 // `TransferBatch` with single block. 965 TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG); 966 ToArray[0] = 967 compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)); 968 Region->FreeListInfo.PoppedBlocks += 1; 969 return 1U; 970 } 971 972 // So far, instead of always filling blocks to `MaxBlockCount`, we only 973 // examine single `TransferBatch` to minimize the time spent in the primary 974 // allocator. Besides, the sizes of `TransferBatch` and 975 // `CacheT::getMaxCached()` may also impact the time spent on accessing the 976 // primary allocator. 977 // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` 978 // blocks and/or adjust the size of `TransferBatch` according to 979 // `CacheT::getMaxCached()`. 980 TransferBatchT *B = Batches.front(); 981 DCHECK_NE(B, nullptr); 982 DCHECK_GT(B->getCount(), 0U); 983 984 // BachClassId should always take all blocks in the TransferBatch. Read the 985 // comment in `pushBatchClassBlocks()` for more details. 986 const u16 PopCount = ClassId == SizeClassMap::BatchClassId 987 ? B->getCount() 988 : Min(MaxBlockCount, B->getCount()); 989 B->moveNToArray(ToArray, PopCount); 990 991 // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be 992 // done without holding `FLLock`. 993 if (B->empty()) { 994 Batches.pop_front(); 995 // `TransferBatch` of BatchClassId is self-contained, no need to 996 // deallocate. Read the comment in `pushBatchClassBlocks()` for more 997 // details. 998 if (ClassId != SizeClassMap::BatchClassId) 999 C->deallocate(SizeClassMap::BatchClassId, B); 1000 1001 if (Batches.empty()) { 1002 BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); 1003 Region->FreeListInfo.BlockList.pop_front(); 1004 1005 // We don't keep BatchGroup with zero blocks to avoid empty-checking 1006 // while allocating. Note that block used for constructing BatchGroup is 1007 // recorded as free blocks in the last element of BatchGroup::Batches. 1008 // Which means, once we pop the last TransferBatch, the block is 1009 // implicitly deallocated. 1010 if (ClassId != SizeClassMap::BatchClassId) 1011 C->deallocate(SizeClassMap::BatchClassId, BG); 1012 } 1013 } 1014 1015 Region->FreeListInfo.PoppedBlocks += PopCount; 1016 1017 return PopCount; 1018 } 1019 populateFreeListAndPopBlocks(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount)1020 NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId, 1021 RegionInfo *Region, 1022 CompactPtrT *ToArray, 1023 const u16 MaxBlockCount) 1024 REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { 1025 if (!Config::getEnableContiguousRegions() && 1026 !Region->MemMapInfo.MemMap.isAllocated()) { 1027 ReservedMemoryT ReservedMemory; 1028 if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize, 1029 "scudo:primary_reserve", 1030 MAP_ALLOWNOMEM))) { 1031 Printf("Can't reserve pages for size class %zu.\n", 1032 getSizeByClassId(ClassId)); 1033 return 0U; 1034 } 1035 initRegion(Region, ClassId, 1036 ReservedMemory.dispatch(ReservedMemory.getBase(), 1037 ReservedMemory.getCapacity()), 1038 /*EnableRandomOffset=*/false); 1039 } 1040 1041 DCHECK(Region->MemMapInfo.MemMap.isAllocated()); 1042 const uptr Size = getSizeByClassId(ClassId); 1043 const u16 MaxCount = CacheT::getMaxCached(Size); 1044 const uptr RegionBeg = Region->RegionBeg; 1045 const uptr MappedUser = Region->MemMapInfo.MappedUser; 1046 const uptr TotalUserBytes = 1047 Region->MemMapInfo.AllocatedUser + MaxCount * Size; 1048 // Map more space for blocks, if necessary. 1049 if (TotalUserBytes > MappedUser) { 1050 // Do the mmap for the user memory. 1051 const uptr MapSize = 1052 roundUp(TotalUserBytes - MappedUser, MapSizeIncrement); 1053 const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); 1054 if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { 1055 Region->Exhausted = true; 1056 return 0U; 1057 } 1058 1059 if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( 1060 RegionBeg + MappedUser, MapSize, "scudo:primary", 1061 MAP_ALLOWNOMEM | MAP_RESIZABLE | 1062 (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG 1063 : 0)))) { 1064 return 0U; 1065 } 1066 Region->MemMapInfo.MappedUser += MapSize; 1067 C->getStats().add(StatMapped, MapSize); 1068 } 1069 1070 const u32 NumberOfBlocks = 1071 Min(MaxNumBatches * MaxCount, 1072 static_cast<u32>((Region->MemMapInfo.MappedUser - 1073 Region->MemMapInfo.AllocatedUser) / 1074 Size)); 1075 DCHECK_GT(NumberOfBlocks, 0); 1076 1077 constexpr u32 ShuffleArraySize = 1078 MaxNumBatches * TransferBatchT::MaxNumCached; 1079 CompactPtrT ShuffleArray[ShuffleArraySize]; 1080 DCHECK_LE(NumberOfBlocks, ShuffleArraySize); 1081 1082 const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); 1083 uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser; 1084 for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) 1085 ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); 1086 1087 ScopedLock L(Region->FLLock); 1088 1089 if (ClassId != SizeClassMap::BatchClassId) { 1090 u32 N = 1; 1091 uptr CurGroup = compactPtrGroup(ShuffleArray[0]); 1092 for (u32 I = 1; I < NumberOfBlocks; I++) { 1093 if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) { 1094 shuffle(ShuffleArray + I - N, N, &Region->RandState); 1095 pushBlocksImpl(C, ClassId, Region, ShuffleArray + I - N, N, 1096 /*SameGroup=*/true); 1097 N = 1; 1098 CurGroup = compactPtrGroup(ShuffleArray[I]); 1099 } else { 1100 ++N; 1101 } 1102 } 1103 1104 shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState); 1105 pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N, 1106 /*SameGroup=*/true); 1107 } else { 1108 pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); 1109 } 1110 1111 const u16 PopCount = 1112 popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); 1113 DCHECK_NE(PopCount, 0U); 1114 1115 // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record 1116 // the requests from `PushBlocks` and `PopBatch` which are external 1117 // interfaces. `populateFreeListAndPopBlocks` is the internal interface so 1118 // we should set the values back to avoid incorrectly setting the stats. 1119 Region->FreeListInfo.PushedBlocks -= NumberOfBlocks; 1120 1121 const uptr AllocatedUser = Size * NumberOfBlocks; 1122 C->getStats().add(StatFree, AllocatedUser); 1123 Region->MemMapInfo.AllocatedUser += AllocatedUser; 1124 1125 return PopCount; 1126 } 1127 getStats(ScopedString * Str,uptr ClassId,RegionInfo * Region)1128 void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) 1129 REQUIRES(Region->MMLock, Region->FLLock) { 1130 if (Region->MemMapInfo.MappedUser == 0) 1131 return; 1132 const uptr BlockSize = getSizeByClassId(ClassId); 1133 const uptr InUseBlocks = 1134 Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; 1135 const uptr BytesInFreeList = 1136 Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize; 1137 uptr RegionPushedBytesDelta = 0; 1138 if (BytesInFreeList >= 1139 Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { 1140 RegionPushedBytesDelta = 1141 BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; 1142 } 1143 const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize; 1144 Str->append( 1145 "%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " 1146 "inuse: %6zu total: %6zu releases: %6zu last " 1147 "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx (0x%zx)\n", 1148 Region->Exhausted ? "E" : " ", ClassId, getSizeByClassId(ClassId), 1149 Region->MemMapInfo.MappedUser >> 10, Region->FreeListInfo.PoppedBlocks, 1150 Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks, 1151 Region->ReleaseInfo.RangesReleased, 1152 Region->ReleaseInfo.LastReleasedBytes >> 10, 1153 RegionPushedBytesDelta >> 10, Region->RegionBeg, 1154 getRegionBaseByClassId(ClassId)); 1155 } 1156 getRegionFragmentationInfo(RegionInfo * Region,uptr ClassId,ScopedString * Str)1157 void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId, 1158 ScopedString *Str) REQUIRES(Region->MMLock) { 1159 const uptr BlockSize = getSizeByClassId(ClassId); 1160 const uptr AllocatedUserEnd = 1161 Region->MemMapInfo.AllocatedUser + Region->RegionBeg; 1162 1163 SinglyLinkedList<BatchGroupT> GroupsToRelease; 1164 { 1165 ScopedLock L(Region->FLLock); 1166 GroupsToRelease = Region->FreeListInfo.BlockList; 1167 Region->FreeListInfo.BlockList.clear(); 1168 } 1169 1170 FragmentationRecorder Recorder; 1171 if (!GroupsToRelease.empty()) { 1172 PageReleaseContext Context = 1173 markFreeBlocks(Region, BlockSize, AllocatedUserEnd, 1174 getCompactPtrBaseByClassId(ClassId), GroupsToRelease); 1175 auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; 1176 releaseFreeMemoryToOS(Context, Recorder, SkipRegion); 1177 1178 mergeGroupsToReleaseBack(Region, GroupsToRelease); 1179 } 1180 1181 ScopedLock L(Region->FLLock); 1182 const uptr PageSize = getPageSizeCached(); 1183 const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize; 1184 const uptr InUseBlocks = 1185 Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; 1186 const uptr AllocatedPagesCount = 1187 roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize; 1188 DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); 1189 const uptr InUsePages = 1190 AllocatedPagesCount - Recorder.getReleasedPagesCount(); 1191 const uptr InUseBytes = InUsePages * PageSize; 1192 1193 uptr Integral; 1194 uptr Fractional; 1195 computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral, 1196 &Fractional); 1197 Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " 1198 "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", 1199 ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, 1200 AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); 1201 } 1202 getMemoryGroupFragmentationInfoInRegion(RegionInfo * Region,uptr ClassId,ScopedString * Str)1203 void getMemoryGroupFragmentationInfoInRegion(RegionInfo *Region, uptr ClassId, 1204 ScopedString *Str) 1205 REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { 1206 const uptr BlockSize = getSizeByClassId(ClassId); 1207 const uptr AllocatedUserEnd = 1208 Region->MemMapInfo.AllocatedUser + Region->RegionBeg; 1209 1210 SinglyLinkedList<BatchGroupT> GroupsToRelease; 1211 { 1212 ScopedLock L(Region->FLLock); 1213 GroupsToRelease = Region->FreeListInfo.BlockList; 1214 Region->FreeListInfo.BlockList.clear(); 1215 } 1216 1217 constexpr uptr GroupSize = (1UL << GroupSizeLog); 1218 constexpr uptr MaxNumGroups = RegionSize / GroupSize; 1219 1220 MemoryGroupFragmentationRecorder<GroupSize, MaxNumGroups> Recorder; 1221 if (!GroupsToRelease.empty()) { 1222 PageReleaseContext Context = 1223 markFreeBlocks(Region, BlockSize, AllocatedUserEnd, 1224 getCompactPtrBaseByClassId(ClassId), GroupsToRelease); 1225 auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; 1226 releaseFreeMemoryToOS(Context, Recorder, SkipRegion); 1227 1228 mergeGroupsToReleaseBack(Region, GroupsToRelease); 1229 } 1230 1231 Str->append("MemoryGroupFragmentationInfo in Region %zu (%zu)\n", ClassId, 1232 BlockSize); 1233 1234 const uptr MaxNumGroupsInUse = 1235 roundUp(Region->MemMapInfo.AllocatedUser, GroupSize) / GroupSize; 1236 for (uptr I = 0; I < MaxNumGroupsInUse; ++I) { 1237 uptr Integral; 1238 uptr Fractional; 1239 computePercentage(Recorder.NumPagesInOneGroup - 1240 Recorder.getNumFreePages(I), 1241 Recorder.NumPagesInOneGroup, &Integral, &Fractional); 1242 Str->append("MemoryGroup #%zu (0x%zx): util: %3zu.%02zu%%\n", I, 1243 Region->RegionBeg + I * GroupSize, Integral, Fractional); 1244 } 1245 } 1246 1247 NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, 1248 ReleaseToOS ReleaseType = ReleaseToOS::Normal) 1249 REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { 1250 const uptr BlockSize = getSizeByClassId(ClassId); 1251 uptr BytesInFreeList; 1252 const uptr AllocatedUserEnd = 1253 Region->MemMapInfo.AllocatedUser + Region->RegionBeg; 1254 uptr RegionPushedBytesDelta = 0; 1255 SinglyLinkedList<BatchGroupT> GroupsToRelease; 1256 1257 { 1258 ScopedLock L(Region->FLLock); 1259 1260 BytesInFreeList = Region->MemMapInfo.AllocatedUser - 1261 (Region->FreeListInfo.PoppedBlocks - 1262 Region->FreeListInfo.PushedBlocks) * 1263 BlockSize; 1264 if (UNLIKELY(BytesInFreeList == 0)) 1265 return false; 1266 1267 // ==================================================================== // 1268 // 1. Check if we have enough free blocks and if it's worth doing a page 1269 // release. 1270 // ==================================================================== // 1271 if (ReleaseType != ReleaseToOS::ForceAll && 1272 !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList, 1273 ReleaseType)) { 1274 return 0; 1275 } 1276 1277 // Given that we will unlock the freelist for block operations, cache the 1278 // value here so that when we are adapting the `TryReleaseThreshold` 1279 // later, we are using the right metric. 1280 RegionPushedBytesDelta = 1281 BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; 1282 1283 // ==================================================================== // 1284 // 2. Determine which groups can release the pages. Use a heuristic to 1285 // gather groups that are candidates for doing a release. 1286 // ==================================================================== // 1287 if (ReleaseType == ReleaseToOS::ForceAll) { 1288 GroupsToRelease = Region->FreeListInfo.BlockList; 1289 Region->FreeListInfo.BlockList.clear(); 1290 } else { 1291 GroupsToRelease = 1292 collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd, 1293 getCompactPtrBaseByClassId(ClassId)); 1294 } 1295 if (GroupsToRelease.empty()) 1296 return 0; 1297 } 1298 1299 // Note that we have extracted the `GroupsToRelease` from region freelist. 1300 // It's safe to let pushBlocks()/popBlocks() access the remaining region 1301 // freelist. In the steps 3 and 4, we will temporarily release the FLLock 1302 // and lock it again before step 5. 1303 1304 // ==================================================================== // 1305 // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`. 1306 // Then we can tell which pages are in-use by querying 1307 // `PageReleaseContext`. 1308 // ==================================================================== // 1309 PageReleaseContext Context = 1310 markFreeBlocks(Region, BlockSize, AllocatedUserEnd, 1311 getCompactPtrBaseByClassId(ClassId), GroupsToRelease); 1312 if (UNLIKELY(!Context.hasBlockMarked())) { 1313 mergeGroupsToReleaseBack(Region, GroupsToRelease); 1314 return 0; 1315 } 1316 1317 // ==================================================================== // 1318 // 4. Release the unused physical pages back to the OS. 1319 // ==================================================================== // 1320 RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap, 1321 Region->RegionBeg, 1322 Context.getReleaseOffset()); 1323 auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; 1324 releaseFreeMemoryToOS(Context, Recorder, SkipRegion); 1325 if (Recorder.getReleasedRangesCount() > 0) { 1326 // This is the case that we didn't hit the release threshold but it has 1327 // been past a certain period of time. Thus we try to release some pages 1328 // and if it does release some additional pages, it's hint that we are 1329 // able to lower the threshold. Currently, this case happens when the 1330 // `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As 1331 // a result, we shrink the threshold to half accordingly. 1332 // TODO(chiahungduan): Apply the same adjustment strategy to small blocks. 1333 if (!isSmallBlock(BlockSize)) { 1334 if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold && 1335 Recorder.getReleasedBytes() > 1336 Region->ReleaseInfo.LastReleasedBytes + 1337 getMinReleaseAttemptSize(BlockSize)) { 1338 Region->ReleaseInfo.TryReleaseThreshold = 1339 Max(Region->ReleaseInfo.TryReleaseThreshold / 2, 1340 getMinReleaseAttemptSize(BlockSize)); 1341 } 1342 } 1343 1344 Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; 1345 Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); 1346 Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); 1347 } 1348 Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); 1349 1350 if (Region->ReleaseInfo.PendingPushedBytesDelta > 0) { 1351 // Instead of increasing the threshold by the amount of 1352 // `PendingPushedBytesDelta`, we only increase half of the amount so that 1353 // it won't be a leap (which may lead to higher memory pressure) because 1354 // of certain memory usage bursts which don't happen frequently. 1355 Region->ReleaseInfo.TryReleaseThreshold += 1356 Region->ReleaseInfo.PendingPushedBytesDelta / 2; 1357 // This is another guard of avoiding the growth of threshold indefinitely. 1358 // Note that we may consider to make this configurable if we have a better 1359 // way to model this. 1360 Region->ReleaseInfo.TryReleaseThreshold = Min<uptr>( 1361 Region->ReleaseInfo.TryReleaseThreshold, (1UL << GroupSizeLog) / 2); 1362 Region->ReleaseInfo.PendingPushedBytesDelta = 0; 1363 } 1364 1365 // ====================================================================== // 1366 // 5. Merge the `GroupsToRelease` back to the freelist. 1367 // ====================================================================== // 1368 mergeGroupsToReleaseBack(Region, GroupsToRelease); 1369 1370 return Recorder.getReleasedBytes(); 1371 } 1372 hasChanceToReleasePages(RegionInfo * Region,uptr BlockSize,uptr BytesInFreeList,ReleaseToOS ReleaseType)1373 bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize, 1374 uptr BytesInFreeList, ReleaseToOS ReleaseType) 1375 REQUIRES(Region->MMLock, Region->FLLock) { 1376 DCHECK_GE(Region->FreeListInfo.PoppedBlocks, 1377 Region->FreeListInfo.PushedBlocks); 1378 // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value 1379 // so that we won't underestimate the releasable pages. For example, the 1380 // following is the region usage, 1381 // 1382 // BytesInFreeListAtLastCheckpoint AllocatedUser 1383 // v v 1384 // |---------------------------------------> 1385 // ^ ^ 1386 // BytesInFreeList ReleaseThreshold 1387 // 1388 // In general, if we have collected enough bytes and the amount of free 1389 // bytes meets the ReleaseThreshold, we will try to do page release. If we 1390 // don't update `BytesInFreeListAtLastCheckpoint` when the current 1391 // `BytesInFreeList` is smaller, we may take longer time to wait for enough 1392 // freed blocks because we miss the bytes between 1393 // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). 1394 if (BytesInFreeList <= 1395 Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { 1396 Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; 1397 } 1398 1399 const uptr RegionPushedBytesDelta = 1400 BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; 1401 1402 if (ReleaseType == ReleaseToOS::Normal) { 1403 if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold / 2) 1404 return false; 1405 1406 const s64 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); 1407 if (IntervalMs < 0) 1408 return false; 1409 1410 const u64 IntervalNs = static_cast<u64>(IntervalMs) * 1000000; 1411 const u64 CurTimeNs = getMonotonicTimeFast(); 1412 const u64 DiffSinceLastReleaseNs = 1413 CurTimeNs - Region->ReleaseInfo.LastReleaseAtNs; 1414 1415 // At here, `RegionPushedBytesDelta` is more than half of 1416 // `TryReleaseThreshold`. If the last release happened 2 release interval 1417 // before, we will still try to see if there's any chance to release some 1418 // memory even it doesn't exceed the threshold. 1419 if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold) { 1420 // We want the threshold to have a shorter response time to the variant 1421 // memory usage patterns. According to data collected during experiments 1422 // (which were done with 1, 2, 4, 8 intervals), `2` strikes the better 1423 // balance between the memory usage and number of page release attempts. 1424 if (DiffSinceLastReleaseNs < 2 * IntervalNs) 1425 return false; 1426 } else if (DiffSinceLastReleaseNs < IntervalNs) { 1427 // In this case, we are over the threshold but we just did some page 1428 // release in the same release interval. This is a hint that we may want 1429 // a higher threshold so that we can release more memory at once. 1430 // `TryReleaseThreshold` will be adjusted according to how many bytes 1431 // are not released, i.e., the `PendingPushedBytesdelta` here. 1432 // TODO(chiahungduan): Apply the same adjustment strategy to small 1433 // blocks. 1434 if (!isSmallBlock(BlockSize)) 1435 Region->ReleaseInfo.PendingPushedBytesDelta = RegionPushedBytesDelta; 1436 1437 // Memory was returned recently. 1438 return false; 1439 } 1440 } // if (ReleaseType == ReleaseToOS::Normal) 1441 1442 return true; 1443 } 1444 1445 SinglyLinkedList<BatchGroupT> collectGroupsToRelease(RegionInfo * Region,const uptr BlockSize,const uptr AllocatedUserEnd,const uptr CompactPtrBase)1446 collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize, 1447 const uptr AllocatedUserEnd, const uptr CompactPtrBase) 1448 REQUIRES(Region->MMLock, Region->FLLock) { 1449 const uptr GroupSize = (1UL << GroupSizeLog); 1450 const uptr PageSize = getPageSizeCached(); 1451 SinglyLinkedList<BatchGroupT> GroupsToRelease; 1452 1453 // We are examining each group and will take the minimum distance to the 1454 // release threshold as the next `TryReleaseThreshold`. Note that if the 1455 // size of free blocks has reached the release threshold, the distance to 1456 // the next release will be PageSize * SmallerBlockReleasePageDelta. See the 1457 // comment on `SmallerBlockReleasePageDelta` for more details. 1458 uptr MinDistToThreshold = GroupSize; 1459 1460 for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), 1461 *Prev = nullptr; 1462 BG != nullptr;) { 1463 // Group boundary is always GroupSize-aligned from CompactPtr base. The 1464 // layout of memory groups is like, 1465 // 1466 // (CompactPtrBase) 1467 // #1 CompactPtrGroupBase #2 CompactPtrGroupBase ... 1468 // | | | 1469 // v v v 1470 // +-----------------------+-----------------------+ 1471 // \ / \ / 1472 // --- GroupSize --- --- GroupSize --- 1473 // 1474 // After decompacting the CompactPtrGroupBase, we expect the alignment 1475 // property is held as well. 1476 const uptr BatchGroupBase = 1477 decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase); 1478 DCHECK_LE(Region->RegionBeg, BatchGroupBase); 1479 DCHECK_GE(AllocatedUserEnd, BatchGroupBase); 1480 DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U); 1481 // TransferBatches are pushed in front of BG.Batches. The first one may 1482 // not have all caches used. 1483 const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch + 1484 BG->Batches.front()->getCount(); 1485 const uptr BytesInBG = NumBlocks * BlockSize; 1486 1487 if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) { 1488 BG->BytesInBGAtLastCheckpoint = BytesInBG; 1489 Prev = BG; 1490 BG = BG->Next; 1491 continue; 1492 } 1493 1494 const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint; 1495 if (PushedBytesDelta < getMinReleaseAttemptSize(BlockSize)) { 1496 Prev = BG; 1497 BG = BG->Next; 1498 continue; 1499 } 1500 1501 // Given the randomness property, we try to release the pages only if the 1502 // bytes used by free blocks exceed certain proportion of group size. Note 1503 // that this heuristic only applies when all the spaces in a BatchGroup 1504 // are allocated. 1505 if (isSmallBlock(BlockSize)) { 1506 const uptr BatchGroupEnd = BatchGroupBase + GroupSize; 1507 const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd 1508 ? GroupSize 1509 : AllocatedUserEnd - BatchGroupBase; 1510 const uptr ReleaseThreshold = 1511 (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; 1512 const bool HighDensity = BytesInBG >= ReleaseThreshold; 1513 const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); 1514 // If all blocks in the group are released, we will do range marking 1515 // which is fast. Otherwise, we will wait until we have accumulated 1516 // a certain amount of free memory. 1517 const bool ReachReleaseDelta = 1518 MayHaveReleasedAll 1519 ? true 1520 : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; 1521 1522 if (!HighDensity) { 1523 DCHECK_LE(BytesInBG, ReleaseThreshold); 1524 // The following is the usage of a memroy group, 1525 // 1526 // BytesInBG ReleaseThreshold 1527 // / \ v 1528 // +---+---------------------------+-----+ 1529 // | | | | | 1530 // +---+---------------------------+-----+ 1531 // \ / ^ 1532 // PushedBytesDelta GroupEnd 1533 MinDistToThreshold = 1534 Min(MinDistToThreshold, 1535 ReleaseThreshold - BytesInBG + PushedBytesDelta); 1536 } else { 1537 // If it reaches high density at this round, the next time we will try 1538 // to release is based on SmallerBlockReleasePageDelta 1539 MinDistToThreshold = 1540 Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); 1541 } 1542 1543 if (!HighDensity || !ReachReleaseDelta) { 1544 Prev = BG; 1545 BG = BG->Next; 1546 continue; 1547 } 1548 } 1549 1550 // If `BG` is the first BatchGroupT in the list, we only need to advance 1551 // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed 1552 // for `Prev`. 1553 // 1554 // (BG) (BG->Next) 1555 // Prev Cur BG 1556 // | | | 1557 // v v v 1558 // nil +--+ +--+ 1559 // |X | -> | | -> ... 1560 // +--+ +--+ 1561 // 1562 // Otherwise, `Prev` will be used to extract the `Cur` from the 1563 // `FreeListInfo.BlockList`. 1564 // 1565 // (BG) (BG->Next) 1566 // Prev Cur BG 1567 // | | | 1568 // v v v 1569 // +--+ +--+ +--+ 1570 // | | -> |X | -> | | -> ... 1571 // +--+ +--+ +--+ 1572 // 1573 // After FreeListInfo.BlockList::extract(), 1574 // 1575 // Prev Cur BG 1576 // | | | 1577 // v v v 1578 // +--+ +--+ +--+ 1579 // | |-+ |X | +->| | -> ... 1580 // +--+ | +--+ | +--+ 1581 // +--------+ 1582 // 1583 // Note that we need to advance before pushing this BatchGroup to 1584 // GroupsToRelease because it's a destructive operation. 1585 1586 BatchGroupT *Cur = BG; 1587 BG = BG->Next; 1588 1589 // Ideally, we may want to update this only after successful release. 1590 // However, for smaller blocks, each block marking is a costly operation. 1591 // Therefore, we update it earlier. 1592 // TODO: Consider updating this after releasing pages if `ReleaseRecorder` 1593 // can tell the released bytes in each group. 1594 Cur->BytesInBGAtLastCheckpoint = BytesInBG; 1595 1596 if (Prev != nullptr) 1597 Region->FreeListInfo.BlockList.extract(Prev, Cur); 1598 else 1599 Region->FreeListInfo.BlockList.pop_front(); 1600 GroupsToRelease.push_back(Cur); 1601 } 1602 1603 // Only small blocks have the adaptive `TryReleaseThreshold`. 1604 if (isSmallBlock(BlockSize)) { 1605 // If the MinDistToThreshold is not updated, that means each memory group 1606 // may have only pushed less than a page size. In that case, just set it 1607 // back to normal. 1608 if (MinDistToThreshold == GroupSize) 1609 MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; 1610 Region->ReleaseInfo.TryReleaseThreshold = MinDistToThreshold; 1611 } 1612 1613 return GroupsToRelease; 1614 } 1615 1616 PageReleaseContext markFreeBlocks(RegionInfo * Region,const uptr BlockSize,const uptr AllocatedUserEnd,const uptr CompactPtrBase,SinglyLinkedList<BatchGroupT> & GroupsToRelease)1617 markFreeBlocks(RegionInfo *Region, const uptr BlockSize, 1618 const uptr AllocatedUserEnd, const uptr CompactPtrBase, 1619 SinglyLinkedList<BatchGroupT> &GroupsToRelease) 1620 REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { 1621 const uptr GroupSize = (1UL << GroupSizeLog); 1622 auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { 1623 return decompactPtrInternal(CompactPtrBase, CompactPtr); 1624 }; 1625 1626 const uptr ReleaseBase = decompactGroupBase( 1627 CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase); 1628 const uptr LastGroupEnd = 1629 Min(decompactGroupBase(CompactPtrBase, 1630 GroupsToRelease.back()->CompactPtrGroupBase) + 1631 GroupSize, 1632 AllocatedUserEnd); 1633 // The last block may straddle the group boundary. Rounding up to BlockSize 1634 // to get the exact range. 1635 const uptr ReleaseEnd = 1636 roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) + 1637 Region->RegionBeg; 1638 const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase; 1639 const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg; 1640 1641 PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, 1642 ReleaseRangeSize, ReleaseOffset); 1643 // We may not be able to do the page release in a rare case that we may 1644 // fail on PageMap allocation. 1645 if (UNLIKELY(!Context.ensurePageMapAllocated())) 1646 return Context; 1647 1648 for (BatchGroupT &BG : GroupsToRelease) { 1649 const uptr BatchGroupBase = 1650 decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase); 1651 const uptr BatchGroupEnd = BatchGroupBase + GroupSize; 1652 const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd 1653 ? GroupSize 1654 : AllocatedUserEnd - BatchGroupBase; 1655 const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize; 1656 const bool MayContainLastBlockInRegion = 1657 BatchGroupUsedEnd == AllocatedUserEnd; 1658 const bool BlockAlignedWithUsedEnd = 1659 (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0; 1660 1661 uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; 1662 if (!BlockAlignedWithUsedEnd) 1663 ++MaxContainedBlocks; 1664 1665 const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + 1666 BG.Batches.front()->getCount(); 1667 1668 if (NumBlocks == MaxContainedBlocks) { 1669 for (const auto &It : BG.Batches) { 1670 if (&It != BG.Batches.front()) 1671 DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch); 1672 for (u16 I = 0; I < It.getCount(); ++I) 1673 DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); 1674 } 1675 1676 Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, 1677 Region->RegionBeg, /*RegionIndex=*/0, 1678 Region->MemMapInfo.AllocatedUser); 1679 } else { 1680 DCHECK_LT(NumBlocks, MaxContainedBlocks); 1681 // Note that we don't always visit blocks in each BatchGroup so that we 1682 // may miss the chance of releasing certain pages that cross 1683 // BatchGroups. 1684 Context.markFreeBlocksInRegion( 1685 BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0, 1686 Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion); 1687 } 1688 } 1689 1690 DCHECK(Context.hasBlockMarked()); 1691 1692 return Context; 1693 } 1694 mergeGroupsToReleaseBack(RegionInfo * Region,SinglyLinkedList<BatchGroupT> & GroupsToRelease)1695 void mergeGroupsToReleaseBack(RegionInfo *Region, 1696 SinglyLinkedList<BatchGroupT> &GroupsToRelease) 1697 REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { 1698 ScopedLock L(Region->FLLock); 1699 1700 // After merging two freelists, we may have redundant `BatchGroup`s that 1701 // need to be recycled. The number of unused `BatchGroup`s is expected to be 1702 // small. Pick a constant which is inferred from real programs. 1703 constexpr uptr MaxUnusedSize = 8; 1704 CompactPtrT Blocks[MaxUnusedSize]; 1705 u32 Idx = 0; 1706 RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId); 1707 // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s 1708 // when we are manipulating the freelist of `BatchClassRegion`. Instead, we 1709 // should just push it back to the freelist when we merge two `BatchGroup`s. 1710 // This logic hasn't been implemented because we haven't supported releasing 1711 // pages in `BatchClassRegion`. 1712 DCHECK_NE(BatchClassRegion, Region); 1713 1714 // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note 1715 // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are 1716 // sorted. 1717 for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), 1718 *Prev = nullptr; 1719 ;) { 1720 if (BG == nullptr || GroupsToRelease.empty()) { 1721 if (!GroupsToRelease.empty()) 1722 Region->FreeListInfo.BlockList.append_back(&GroupsToRelease); 1723 break; 1724 } 1725 1726 DCHECK(!BG->Batches.empty()); 1727 1728 if (BG->CompactPtrGroupBase < 1729 GroupsToRelease.front()->CompactPtrGroupBase) { 1730 Prev = BG; 1731 BG = BG->Next; 1732 continue; 1733 } 1734 1735 BatchGroupT *Cur = GroupsToRelease.front(); 1736 TransferBatchT *UnusedTransferBatch = nullptr; 1737 GroupsToRelease.pop_front(); 1738 1739 if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) { 1740 // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while 1741 // collecting the `GroupsToRelease`. 1742 BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint; 1743 const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch; 1744 1745 // Note that the first TransferBatches in both `Batches` may not be 1746 // full and only the first TransferBatch can have non-full blocks. Thus 1747 // we have to merge them before appending one to another. 1748 if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) { 1749 BG->Batches.append_back(&Cur->Batches); 1750 } else { 1751 TransferBatchT *NonFullBatch = Cur->Batches.front(); 1752 Cur->Batches.pop_front(); 1753 const u16 NonFullBatchCount = NonFullBatch->getCount(); 1754 // The remaining Batches in `Cur` are full. 1755 BG->Batches.append_back(&Cur->Batches); 1756 1757 if (BG->Batches.front()->getCount() == MaxCachedPerBatch) { 1758 // Only 1 non-full TransferBatch, push it to the front. 1759 BG->Batches.push_front(NonFullBatch); 1760 } else { 1761 const u16 NumBlocksToMove = static_cast<u16>( 1762 Min(static_cast<u16>(MaxCachedPerBatch - 1763 BG->Batches.front()->getCount()), 1764 NonFullBatchCount)); 1765 BG->Batches.front()->appendFromTransferBatch(NonFullBatch, 1766 NumBlocksToMove); 1767 if (NonFullBatch->isEmpty()) 1768 UnusedTransferBatch = NonFullBatch; 1769 else 1770 BG->Batches.push_front(NonFullBatch); 1771 } 1772 } 1773 1774 const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U; 1775 if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) { 1776 ScopedLock L(BatchClassRegion->FLLock); 1777 pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); 1778 if (conditionVariableEnabled()) 1779 BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); 1780 Idx = 0; 1781 } 1782 Blocks[Idx++] = 1783 compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(Cur)); 1784 if (UnusedTransferBatch) { 1785 Blocks[Idx++] = 1786 compactPtr(SizeClassMap::BatchClassId, 1787 reinterpret_cast<uptr>(UnusedTransferBatch)); 1788 } 1789 Prev = BG; 1790 BG = BG->Next; 1791 continue; 1792 } 1793 1794 // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase 1795 // larger than the first element in `GroupsToRelease`. We need to insert 1796 // `GroupsToRelease::front()` (which is `Cur` below) before `BG`. 1797 // 1798 // 1. If `Prev` is nullptr, we simply push `Cur` to the front of 1799 // FreeListInfo.BlockList. 1800 // 2. Otherwise, use `insert()` which inserts an element next to `Prev`. 1801 // 1802 // Afterwards, we don't need to advance `BG` because the order between 1803 // `BG` and the new `GroupsToRelease::front()` hasn't been checked. 1804 if (Prev == nullptr) 1805 Region->FreeListInfo.BlockList.push_front(Cur); 1806 else 1807 Region->FreeListInfo.BlockList.insert(Prev, Cur); 1808 DCHECK_EQ(Cur->Next, BG); 1809 Prev = Cur; 1810 } 1811 1812 if (Idx != 0) { 1813 ScopedLock L(BatchClassRegion->FLLock); 1814 pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); 1815 if (conditionVariableEnabled()) 1816 BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); 1817 } 1818 1819 if (SCUDO_DEBUG) { 1820 BatchGroupT *Prev = Region->FreeListInfo.BlockList.front(); 1821 for (BatchGroupT *Cur = Prev->Next; Cur != nullptr; 1822 Prev = Cur, Cur = Cur->Next) { 1823 CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase); 1824 } 1825 } 1826 1827 if (conditionVariableEnabled()) 1828 Region->FLLockCV.notifyAll(Region->FLLock); 1829 } 1830 1831 // The minimum size of pushed blocks that we will try to release the pages in 1832 // that size class. 1833 uptr SmallerBlockReleasePageDelta = 0; 1834 atomic_s32 ReleaseToOsIntervalMs = {}; 1835 alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; 1836 }; 1837 1838 } // namespace scudo 1839 1840 #endif // SCUDO_PRIMARY64_H_ 1841