xref: /aosp_15_r20/art/runtime/gc/collector/mark_compact.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <sys/syscall.h>
29 #include <unistd.h>
30 
31 #include <fstream>
32 #include <numeric>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36 
37 #include "android-base/file.h"
38 #include "android-base/parsebool.h"
39 #include "android-base/parseint.h"
40 #include "android-base/properties.h"
41 #include "android-base/strings.h"
42 #include "base/file_utils.h"
43 #include "base/memfd.h"
44 #include "base/quasi_atomic.h"
45 #include "base/systrace.h"
46 #include "base/utils.h"
47 #include "gc/accounting/mod_union_table-inl.h"
48 #include "gc/collector_type.h"
49 #include "gc/reference_processor.h"
50 #include "gc/space/bump_pointer_space.h"
51 #include "gc/task_processor.h"
52 #include "gc/verification-inl.h"
53 #include "jit/jit_code_cache.h"
54 #include "mark_compact-inl.h"
55 #include "mirror/object-refvisitor-inl.h"
56 #include "read_barrier_config.h"
57 #include "scoped_thread_state_change-inl.h"
58 #include "sigchain.h"
59 #include "thread_list.h"
60 
61 #ifdef ART_TARGET_ANDROID
62 #include "android-modules-utils/sdk_level.h"
63 #include "com_android_art.h"
64 #endif
65 
66 #ifndef __BIONIC__
67 #ifndef MREMAP_DONTUNMAP
68 #define MREMAP_DONTUNMAP 4
69 #endif
70 #endif  // __BIONIC__
71 
72 // See aosp/2996596 for where these values came from.
73 #ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
74 #define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
75 #endif
76 #ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
77 #define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
78 #endif
79 #ifdef ART_TARGET_ANDROID
80 namespace {
81 
82 using ::android::base::GetBoolProperty;
83 using ::android::base::ParseBool;
84 using ::android::base::ParseBoolResult;
85 using ::android::modules::sdklevel::IsAtLeastV;
86 
87 }  // namespace
88 #endif
89 
90 namespace art HIDDEN {
91 
HaveMremapDontunmap()92 static bool HaveMremapDontunmap() {
93   const size_t page_size = GetPageSizeSlow();
94   void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
95   CHECK_NE(old, MAP_FAILED);
96   void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
97   CHECK_EQ(munmap(old, page_size), 0);
98   if (addr != MAP_FAILED) {
99     CHECK_EQ(munmap(addr, page_size), 0);
100     return true;
101   } else {
102     return false;
103   }
104 }
105 
106 static bool gUffdSupportsMmapTrylock = false;
107 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
108 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
109 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
110 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
111 static uint64_t gUffdFeatures = 0;
112 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
113 static constexpr uint64_t kUffdFeaturesForMinorFault =
114     UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
115 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
116 // A region which is more than kBlackDenseRegionThreshold percent live doesn't
117 // need to be compacted as it is too densely packed.
118 static constexpr uint kBlackDenseRegionThreshold = 95U;
119 // We consider SIGBUS feature necessary to enable this GC as it's superior than
120 // threading-based implementation for janks. We may want minor-fault in future
121 // to be available for making jit-code-cache updation concurrent, which uses shmem.
KernelSupportsUffd()122 bool KernelSupportsUffd() {
123 #ifdef __linux__
124   if (gHaveMremapDontunmap) {
125     int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
126     // On non-android devices we may not have the kernel patches that restrict
127     // userfaultfd to user mode. But that is not a security concern as we are
128     // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
129     if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
130       fd = syscall(__NR_userfaultfd, O_CLOEXEC);
131     }
132     if (fd >= 0) {
133       // We are only fetching the available features, which is returned by the
134       // ioctl.
135       struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
136       CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
137       gUffdFeatures = api.features;
138       // MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
139       // versions will have per-vma locks. The lower ones don't support
140       // userfaultfd.
141       if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
142         // Check if MMAP_TRYLOCK feature is supported
143         const size_t page_size = GetPageSizeSlow();
144         void* mem =
145             mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
146         CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
147 
148         struct uffdio_zeropage uffd_zeropage;
149         uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
150         uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
151         uffd_zeropage.range.len = page_size;
152         uffd_zeropage.zeropage = 0;
153         // The ioctl will definitely fail as mem is not registered with uffd.
154         CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
155         // uffd ioctls return EINVAL for several reasons. We make sure with
156         // (proper alignment of 'mem' and 'len') that, before updating
157         // uffd_zeropage.zeropage (with error), it fails with EINVAL only if
158         // `trylock` isn't available.
159         if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
160           LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
161                     << " page-size:" << page_size;
162         } else {
163           gUffdSupportsMmapTrylock = true;
164           LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
165                     << " size:" << page_size;
166         }
167         munmap(mem, page_size);
168       }
169       close(fd);
170       // Minimum we need is sigbus feature for using userfaultfd.
171       return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
172     }
173   }
174 #endif
175   return false;
176 }
177 
178 // The other cases are defined as constexpr in runtime/read_barrier_config.h
179 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
180 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()181 static gc::CollectorType FetchCmdlineGcType() {
182   std::string argv;
183   gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
184   if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
185     auto pos = argv.rfind("-Xgc:");
186     if (argv.substr(pos + 5, 3) == "CMC") {
187       gc_type = gc::CollectorType::kCollectorTypeCMC;
188     } else if (argv.substr(pos + 5, 2) == "CC") {
189       gc_type = gc::CollectorType::kCollectorTypeCC;
190     }
191   }
192   return gc_type;
193 }
194 
195 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()196 static int GetOverrideCacheInfoFd() {
197   std::string args_str;
198   if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
199     LOG(WARNING) << "Failed to load /proc/self/cmdline";
200     return -1;
201   }
202   std::vector<std::string_view> args;
203   Split(std::string_view(args_str), /*separator=*/'\0', &args);
204   for (std::string_view arg : args) {
205     if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) {  // This is a dex2oat flag.
206       int fd;
207       if (!android::base::ParseInt(std::string(arg), &fd)) {
208         LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
209         return -1;
210       }
211       return fd;
212     }
213   }
214   return -1;
215 }
216 
GetCachedProperties()217 static std::unordered_map<std::string, std::string> GetCachedProperties() {
218   // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
219   static bool called = false;
220   CHECK(!called) << "GetCachedBoolProperty can be called only once";
221   called = true;
222 
223   std::string cache_info_contents;
224   int fd = GetOverrideCacheInfoFd();
225   if (fd >= 0) {
226     if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
227       PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
228       return {};
229     }
230   } else {
231     std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
232     if (!android::base::ReadFileToString(path, &cache_info_contents)) {
233       // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
234       // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
235       // nothing we can do.
236       if (errno != ENOENT) {
237         PLOG(ERROR) << "Failed to read cache-info from the default path";
238       }
239       return {};
240     }
241   }
242 
243   std::optional<com::android::art::CacheInfo> cache_info =
244       com::android::art::parse(cache_info_contents.c_str());
245   if (!cache_info.has_value()) {
246     // This should never happen.
247     LOG(ERROR) << "Failed to parse cache-info";
248     return {};
249   }
250   const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
251   if (list == nullptr) {
252     // This should never happen.
253     LOG(ERROR) << "Missing system properties from cache-info";
254     return {};
255   }
256   const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
257   std::unordered_map<std::string, std::string> result;
258   for (const com::android::art::KeyValuePair& pair : properties) {
259     result[pair.getK()] = pair.getV();
260   }
261   return result;
262 }
263 
GetCachedBoolProperty(const std::unordered_map<std::string,std::string> & cached_properties,const std::string & key,bool default_value)264 static bool GetCachedBoolProperty(
265     const std::unordered_map<std::string, std::string>& cached_properties,
266     const std::string& key,
267     bool default_value) {
268   auto it = cached_properties.find(key);
269   if (it == cached_properties.end()) {
270     return default_value;
271   }
272   ParseBoolResult result = ParseBool(it->second);
273   switch (result) {
274     case ParseBoolResult::kTrue:
275       return true;
276     case ParseBoolResult::kFalse:
277       return false;
278     case ParseBoolResult::kError:
279       return default_value;
280   }
281 }
282 
SysPropSaysUffdGc()283 static bool SysPropSaysUffdGc() {
284   // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
285   // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
286   std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
287   bool phenotype_enable = GetCachedBoolProperty(
288       cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
289   bool phenotype_force_disable = GetCachedBoolProperty(
290       cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
291   bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
292   bool is_at_most_u = !IsAtLeastV();
293   return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
294 }
295 #else
296 // Never called.
SysPropSaysUffdGc()297 static bool SysPropSaysUffdGc() { return false; }
298 #endif
299 
ShouldUseUserfaultfd()300 static bool ShouldUseUserfaultfd() {
301   static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
302 #ifdef __linux__
303   // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
304   // always use CC on host. On target, use CMC only if system properties says so
305   // and the kernel supports it.
306   gc::CollectorType gc_type = FetchCmdlineGcType();
307   return gc_type == gc::CollectorType::kCollectorTypeCMC ||
308          (gc_type == gc::CollectorType::kCollectorTypeNone &&
309           kIsTargetAndroid &&
310           SysPropSaysUffdGc() &&
311           KernelSupportsUffd());
312 #else
313   return false;
314 #endif
315 }
316 
317 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
318 const bool gUseReadBarrier = !gUseUserfaultfd;
319 #endif
320 
321 namespace gc {
322 namespace collector {
323 
324 // Turn off kCheckLocks when profiling the GC as it slows down the GC
325 // significantly.
326 static constexpr bool kCheckLocks = kDebugLocking;
327 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
328 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
329 // case. It's extremely unlikely that we will ever have more than these number
330 // of mutator threads trying to access the moving-space during one compaction
331 // phase.
332 static constexpr size_t kMutatorCompactionBufferCount = 2048;
333 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
334 // Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
335 static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
336 // Concurrent compaction termination logic is different (and slightly more efficient) if the
337 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
338 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
339 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
340 // up the GC thread at the end.
341 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
342 
GetUffdAndMinorFault()343 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
344   bool uffd_available;
345   // In most cases the gUffdFeatures will already be initialized at boot time
346   // when libart is loaded. On very old kernels we may get '0' from the kernel,
347   // in which case we would be doing the syscalls each time this function is
348   // called. But that's very unlikely case. There are no correctness issues as
349   // the response from kernel never changes after boot.
350   if (UNLIKELY(gUffdFeatures == 0)) {
351     uffd_available = KernelSupportsUffd();
352   } else {
353     // We can have any uffd features only if uffd exists.
354     uffd_available = true;
355   }
356   bool minor_fault_available =
357       (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
358   return std::pair<bool, bool>(uffd_available, minor_fault_available);
359 }
360 
CreateUserfaultfd(bool post_fork)361 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
362   if (post_fork || uffd_ == kFdUnused) {
363     // Check if we have MREMAP_DONTUNMAP here for cases where
364     // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
365     // that userfaultfd isn't used on old kernels, which cause random ioctl
366     // failures.
367     if (gHaveMremapDontunmap) {
368       // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
369       // any read event available. We don't use poll.
370       uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
371       // On non-android devices we may not have the kernel patches that restrict
372       // userfaultfd to user mode. But that is not a security concern as we are
373       // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
374       if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
375         uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
376       }
377       if (UNLIKELY(uffd_ == -1)) {
378         uffd_ = kFallbackMode;
379         LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
380                      << ") and therefore falling back to stop-the-world compaction.";
381       } else {
382         DCHECK(IsValidFd(uffd_));
383         // Initialize uffd with the features which are required and available.
384         // Using private anonymous mapping in threading mode is the default,
385         // for which we don't need to ask for any features. Note: this mode
386         // is not used in production.
387         struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
388         // We should add SIGBUS feature only if we plan on using it as
389         // requesting it here will mean threading mode will not work.
390         CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
391         api.features |= kUffdFeaturesForSigbus;
392         CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
393             << "ioctl_userfaultfd: API: " << strerror(errno);
394       }
395     } else {
396       uffd_ = kFallbackMode;
397     }
398   }
399   uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
400   return IsValidFd(uffd_);
401 }
402 
403 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)404 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
405     uintptr_t begin, uintptr_t end) {
406   return static_cast<LiveWordsBitmap<kAlignment>*>(
407           MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
408 }
409 
ComputeInfoMapSize()410 size_t MarkCompact::ComputeInfoMapSize() {
411   size_t moving_space_size = bump_pointer_space_->Capacity();
412   size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
413   size_t nr_moving_pages = DivideByPageSize(moving_space_size);
414   size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
415   return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
416          nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
417 }
418 
InitializeInfoMap(uint8_t * p,size_t moving_space_sz)419 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
420   size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
421 
422   chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
423   vector_length_ = moving_space_sz / kOffsetChunkSize;
424   size_t total = vector_length_ * sizeof(uint32_t);
425 
426   first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
427   total += nr_moving_pages * sizeof(ObjReference);
428 
429   pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
430   total += nr_moving_pages * sizeof(uint32_t);
431 
432   moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
433   total += nr_moving_pages * sizeof(Atomic<uint32_t>);
434 
435   first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
436   total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
437   DCHECK_EQ(total, ComputeInfoMapSize());
438   return total;
439 }
440 
MarkCompact(Heap * heap)441 MarkCompact::MarkCompact(Heap* heap)
442     : GarbageCollector(heap, "concurrent mark compact"),
443       gc_barrier_(0),
444       lock_("mark compact lock", kGenericBottomLock),
445       bump_pointer_space_(heap->GetBumpPointerSpace()),
446       moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
447       moving_space_begin_(bump_pointer_space_->Begin()),
448       moving_space_end_(bump_pointer_space_->Limit()),
449       black_dense_end_(moving_space_begin_),
450       uffd_(kFdUnused),
451       sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask},
452       compacting_(false),
453       marking_done_(false),
454       uffd_initialized_(false),
455       clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
456   if (kIsDebugBuild) {
457     updated_roots_.reset(new std::unordered_set<void*>());
458   }
459   if (gUffdFeatures == 0) {
460     GetUffdAndMinorFault();
461   }
462   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
463   // TODO: Depending on how the bump-pointer space move is implemented. If we
464   // switch between two virtual memories each time, then we will have to
465   // initialize live_words_bitmap_ accordingly.
466   live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
467       reinterpret_cast<uintptr_t>(moving_space_begin),
468       reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
469 
470   std::string err_msg;
471   size_t moving_space_size = bump_pointer_space_->Capacity();
472   {
473     // Create one MemMap for all the data structures
474     info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
475                                      ComputeInfoMapSize(),
476                                      PROT_READ | PROT_WRITE,
477                                      /*low_4gb=*/false,
478                                      &err_msg);
479     if (UNLIKELY(!info_map_.IsValid())) {
480       LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
481     } else {
482       size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
483       DCHECK_EQ(total, info_map_.Size());
484     }
485   }
486 
487   size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
488   // The moving space is created at a fixed address, which is expected to be
489   // PMD-size aligned.
490   if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
491     LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
492                  << ". This can lead to longer stop-the-world pauses for compaction";
493   }
494   // NOTE: PROT_NONE is used here as these mappings are for address space reservation
495   // only and will be used only after appropriately remapping them.
496   from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
497                                                 moving_space_size,
498                                                 PROT_NONE,
499                                                 /*low_4gb=*/kObjPtrPoisoning,
500                                                 moving_space_alignment,
501                                                 &err_msg);
502   if (UNLIKELY(!from_space_map_.IsValid())) {
503     LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
504   } else {
505     from_space_begin_ = from_space_map_.Begin();
506   }
507 
508   compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
509                                                  (1 + kMutatorCompactionBufferCount) * gPageSize,
510                                                  PROT_READ | PROT_WRITE,
511                                                  /*low_4gb=*/kObjPtrPoisoning,
512                                                  &err_msg);
513   if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
514     LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
515   }
516   // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
517   conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
518   // Touch the page deliberately to avoid userfaults on it. We madvise it in
519   // CompactionPhase() before using it to terminate concurrent compaction.
520   ForceRead(conc_compaction_termination_page_);
521 
522   // In most of the cases, we don't expect more than one LinearAlloc space.
523   linear_alloc_spaces_data_.reserve(1);
524 
525   // Initialize GC metrics.
526   metrics::ArtMetrics* metrics = GetMetrics();
527   // The mark-compact collector supports only full-heap collections at the moment.
528   gc_time_histogram_ = metrics->FullGcCollectionTime();
529   metrics_gc_count_ = metrics->FullGcCount();
530   metrics_gc_count_delta_ = metrics->FullGcCountDelta();
531   gc_throughput_histogram_ = metrics->FullGcThroughput();
532   gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
533   gc_throughput_avg_ = metrics->FullGcThroughputAvg();
534   gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
535   gc_scanned_bytes_ = metrics->FullGcScannedBytes();
536   gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
537   gc_freed_bytes_ = metrics->FullGcFreedBytes();
538   gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
539   gc_duration_ = metrics->FullGcDuration();
540   gc_duration_delta_ = metrics->FullGcDurationDelta();
541   are_metrics_initialized_ = true;
542 }
543 
AddLinearAllocSpaceData(uint8_t * begin,size_t len)544 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
545   DCHECK_ALIGNED_PARAM(begin, gPageSize);
546   DCHECK_ALIGNED_PARAM(len, gPageSize);
547   DCHECK_GE(len, Heap::GetPMDSize());
548   size_t alignment = Heap::BestPageTableAlignment(len);
549   std::string err_msg;
550   MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
551                                             len,
552                                             PROT_NONE,
553                                             /*low_4gb=*/false,
554                                             alignment,
555                                             &err_msg));
556   if (!shadow.IsValid()) {
557     LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
558     UNREACHABLE();
559   }
560 
561   MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
562                                               DivideByPageSize(len),
563                                               PROT_READ | PROT_WRITE,
564                                               /*low_4gb=*/false,
565                                               &err_msg));
566   if (!page_status_map.IsValid()) {
567     LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
568     UNREACHABLE();
569   }
570   linear_alloc_spaces_data_.emplace_back(
571       std::forward<MemMap>(shadow), std::forward<MemMap>(page_status_map), begin, begin + len);
572 }
573 
ClampGrowthLimit(size_t new_capacity)574 void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
575   // From-space is the same size as moving-space in virtual memory.
576   // However, if it's in >4GB address space then we don't need to do it
577   // synchronously.
578 #if defined(__LP64__)
579   constexpr bool kClampFromSpace = kObjPtrPoisoning;
580 #else
581   constexpr bool kClampFromSpace = true;
582 #endif
583   size_t old_capacity = bump_pointer_space_->Capacity();
584   new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
585   if (new_capacity < old_capacity) {
586     CHECK(from_space_map_.IsValid());
587     if (kClampFromSpace) {
588       from_space_map_.SetSize(new_capacity);
589     }
590     clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
591   }
592   CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
593 }
594 
MaybeClampGcStructures()595 void MarkCompact::MaybeClampGcStructures() {
596   size_t moving_space_size = bump_pointer_space_->Capacity();
597   DCHECK(thread_running_gc_ != nullptr);
598   if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
599     CHECK(from_space_map_.IsValid());
600     if (from_space_map_.Size() > moving_space_size) {
601       from_space_map_.SetSize(moving_space_size);
602     }
603     // Bitmaps and other data structures
604     live_words_bitmap_->SetBitmapSize(moving_space_size);
605     size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
606     CHECK_LT(set_size, info_map_.Size());
607     info_map_.SetSize(set_size);
608 
609     clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
610   }
611 }
612 
PrepareCardTableForMarking(bool clear_alloc_space_cards)613 void MarkCompact::PrepareCardTableForMarking(bool clear_alloc_space_cards) {
614   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
615   accounting::CardTable* const card_table = heap_->GetCardTable();
616   // immune_spaces_ is emptied in InitializePhase() before marking starts. This
617   // function is invoked twice during marking. We only need to populate immune_spaces_
618   // once per GC cycle. And when it's done (below), all the immune spaces are
619   // added to it. We can never have partially filled immune_spaces_.
620   bool update_immune_spaces = immune_spaces_.IsEmpty();
621   // Mark all of the spaces we never collect as immune.
622   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
623     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
624         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
625       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
626       if (update_immune_spaces) {
627         immune_spaces_.AddSpace(space);
628       }
629       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
630       if (table != nullptr) {
631         table->ProcessCards();
632       } else {
633         // Keep cards aged if we don't have a mod-union table since we need
634         // to scan them in future GCs. This case is for app images.
635         card_table->ModifyCardsAtomic(
636             space->Begin(),
637             space->End(),
638             [](uint8_t card) {
639               return (card == gc::accounting::CardTable::kCardClean)
640                   ? card
641                   : gc::accounting::CardTable::kCardAged;
642             },
643             /* card modified visitor */ VoidFunctor());
644       }
645     } else if (clear_alloc_space_cards) {
646       CHECK(!space->IsZygoteSpace());
647       CHECK(!space->IsImageSpace());
648       // The card-table corresponding to bump-pointer and non-moving space can
649       // be cleared, because we are going to traverse all the reachable objects
650       // in these spaces. This card-table will eventually be used to track
651       // mutations while concurrent marking is going on.
652       card_table->ClearCardRange(space->Begin(), space->Limit());
653       if (space != bump_pointer_space_) {
654         CHECK_EQ(space, heap_->GetNonMovingSpace());
655         non_moving_space_ = space;
656         non_moving_space_bitmap_ = space->GetMarkBitmap();
657       }
658     } else {
659       card_table->ModifyCardsAtomic(
660           space->Begin(),
661           space->End(),
662           [](uint8_t card) {
663             return (card == gc::accounting::CardTable::kCardDirty) ?
664                        gc::accounting::CardTable::kCardAged :
665                        gc::accounting::CardTable::kCardClean;
666           },
667           /* card modified visitor */ VoidFunctor());
668     }
669   }
670 }
671 
MarkZygoteLargeObjects()672 void MarkCompact::MarkZygoteLargeObjects() {
673   Thread* self = thread_running_gc_;
674   DCHECK_EQ(self, Thread::Current());
675   space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
676   if (los != nullptr) {
677     // Pick the current live bitmap (mark bitmap if swapped).
678     accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
679     accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
680     // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
681     std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
682     live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
683                                   reinterpret_cast<uintptr_t>(range.second),
684                                   [mark_bitmap, los, self](mirror::Object* obj)
685                                       REQUIRES(Locks::heap_bitmap_lock_)
686                                           REQUIRES_SHARED(Locks::mutator_lock_) {
687                                             if (los->IsZygoteLargeObject(self, obj)) {
688                                               mark_bitmap->Set(obj);
689                                             }
690                                           });
691   }
692 }
693 
InitializePhase()694 void MarkCompact::InitializePhase() {
695   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
696   mark_stack_ = heap_->GetMarkStack();
697   CHECK(mark_stack_->IsEmpty());
698   immune_spaces_.Reset();
699   moving_first_objs_count_ = 0;
700   non_moving_first_objs_count_ = 0;
701   black_page_count_ = 0;
702   bytes_scanned_ = 0;
703   freed_objects_ = 0;
704   // The first buffer is used by gc-thread.
705   compaction_buffer_counter_.store(1, std::memory_order_relaxed);
706   black_allocations_begin_ = bump_pointer_space_->Limit();
707   DCHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
708   from_space_slide_diff_ = from_space_begin_ - moving_space_begin_;
709   moving_space_end_ = bump_pointer_space_->Limit();
710   if (black_dense_end_ > moving_space_begin_) {
711     moving_space_bitmap_->Clear();
712   }
713   black_dense_end_ = moving_space_begin_;
714   // TODO: Would it suffice to read it once in the constructor, which is called
715   // in zygote process?
716   pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
717 }
718 
719 class MarkCompact::ThreadFlipVisitor : public Closure {
720  public:
ThreadFlipVisitor(MarkCompact * collector)721   explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
722 
Run(Thread * thread)723   void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
724     // Note: self is not necessarily equal to thread since thread may be suspended.
725     Thread* self = Thread::Current();
726     CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
727         << thread->GetState() << " thread " << thread << " self " << self;
728     thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
729     // Interpreter cache is thread-local so it needs to be swept either in a
730     // flip, or a stop-the-world pause.
731     CHECK(collector_->compacting_);
732     thread->SweepInterpreterCache(collector_);
733     thread->AdjustTlab(collector_->black_objs_slide_diff_);
734   }
735 
736  private:
737   MarkCompact* const collector_;
738 };
739 
740 class MarkCompact::FlipCallback : public Closure {
741  public:
FlipCallback(MarkCompact * collector)742   explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
743 
Run(Thread * thread)744   void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
745     collector_->CompactionPause();
746   }
747 
748  private:
749   MarkCompact* const collector_;
750 };
751 
RunPhases()752 void MarkCompact::RunPhases() {
753   Thread* self = Thread::Current();
754   thread_running_gc_ = self;
755   Runtime* runtime = Runtime::Current();
756   InitializePhase();
757   GetHeap()->PreGcVerification(this);
758   {
759     ReaderMutexLock mu(self, *Locks::mutator_lock_);
760     MarkingPhase();
761   }
762   {
763     // Marking pause
764     ScopedPause pause(this);
765     MarkingPause();
766     if (kIsDebugBuild) {
767       bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
768     }
769   }
770   bool perform_compaction;
771   {
772     ReaderMutexLock mu(self, *Locks::mutator_lock_);
773     ReclaimPhase();
774     perform_compaction = PrepareForCompaction();
775   }
776 
777   if (perform_compaction) {
778     // Compaction pause
779     ThreadFlipVisitor visitor(this);
780     FlipCallback callback(this);
781     runtime->GetThreadList()->FlipThreadRoots(
782         &visitor, &callback, this, GetHeap()->GetGcPauseListener());
783 
784     if (IsValidFd(uffd_)) {
785       ReaderMutexLock mu(self, *Locks::mutator_lock_);
786       CompactionPhase();
787     }
788   }
789   FinishPhase();
790   GetHeap()->PostGcVerification(this);
791   thread_running_gc_ = nullptr;
792 }
793 
InitMovingSpaceFirstObjects(size_t vec_len,size_t to_space_page_idx)794 void MarkCompact::InitMovingSpaceFirstObjects(size_t vec_len, size_t to_space_page_idx) {
795   uint32_t offset_in_chunk_word;
796   uint32_t offset;
797   mirror::Object* obj;
798   const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
799 
800   // Find the first live word.
801   size_t chunk_idx = to_space_page_idx * (gPageSize / kOffsetChunkSize);
802   DCHECK_LT(chunk_idx, vec_len);
803   // Find the first live word in the space
804   for (; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
805     if (chunk_idx >= vec_len) {
806       // We don't have any live data on the moving-space.
807       moving_first_objs_count_ = to_space_page_idx;
808       return;
809     }
810   }
811   DCHECK_LT(chunk_idx, vec_len);
812   // Use live-words bitmap to find the first live word
813   offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
814   offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
815   DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
816                                            << " chunk_idx=" << chunk_idx
817                                            << " N=0"
818                                            << " offset_in_word=" << offset_in_chunk_word
819                                            << " word=" << std::hex
820                                            << live_words_bitmap_->GetWord(chunk_idx);
821   obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
822   // TODO: add a check to validate the object.
823 
824   pre_compact_offset_moving_space_[to_space_page_idx] = offset;
825   first_objs_moving_space_[to_space_page_idx].Assign(obj);
826   to_space_page_idx++;
827 
828   uint32_t page_live_bytes = 0;
829   while (true) {
830     for (; page_live_bytes <= gPageSize; chunk_idx++) {
831       if (chunk_idx >= vec_len) {
832         moving_first_objs_count_ = to_space_page_idx;
833         return;
834       }
835       page_live_bytes += chunk_info_vec_[chunk_idx];
836     }
837     chunk_idx--;
838     page_live_bytes -= gPageSize;
839     DCHECK_LE(page_live_bytes, kOffsetChunkSize);
840     DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
841         << " chunk_idx=" << chunk_idx
842         << " to_space_page_idx=" << to_space_page_idx
843         << " vec_len=" << vec_len;
844     DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
845     offset_in_chunk_word =
846             live_words_bitmap_->FindNthLiveWordOffset(
847                 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
848     offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
849     DCHECK(live_words_bitmap_->Test(offset))
850         << "offset=" << offset
851         << " chunk_idx=" << chunk_idx
852         << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
853         << " offset_in_word=" << offset_in_chunk_word
854         << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
855     // TODO: Can we optimize this for large objects? If we are continuing a
856     // large object that spans multiple pages, then we may be able to do without
857     // calling FindPrecedingObject().
858     //
859     // Find the object which encapsulates offset in it, which could be
860     // starting at offset itself.
861     obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
862     // TODO: add a check to validate the object.
863     pre_compact_offset_moving_space_[to_space_page_idx] = offset;
864     first_objs_moving_space_[to_space_page_idx].Assign(obj);
865     to_space_page_idx++;
866     chunk_idx++;
867   }
868 }
869 
InitNonMovingFirstObjects(uintptr_t begin,uintptr_t end,accounting::ContinuousSpaceBitmap * bitmap,ObjReference * first_objs_arr)870 size_t MarkCompact::InitNonMovingFirstObjects(uintptr_t begin,
871                                               uintptr_t end,
872                                               accounting::ContinuousSpaceBitmap* bitmap,
873                                               ObjReference* first_objs_arr) {
874   mirror::Object* prev_obj;
875   size_t page_idx;
876   {
877     // Find first live object
878     mirror::Object* obj = nullptr;
879     bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
880                                                   end,
881                                                   [&obj] (mirror::Object* o) {
882                                                     obj = o;
883                                                   });
884     if (obj == nullptr) {
885       // There are no live objects in the space
886       return 0;
887     }
888     page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
889     first_objs_arr[page_idx++].Assign(obj);
890     prev_obj = obj;
891   }
892   // TODO: check obj is valid
893   uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
894                            + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
895   // For every page find the object starting from which we need to call
896   // VisitReferences. It could either be an object that started on some
897   // preceding page, or some object starting within this page.
898   begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
899   while (begin < end) {
900     // Utilize, if any, large object that started in some preceding page, but
901     // overlaps with this page as well.
902     if (prev_obj != nullptr && prev_obj_end > begin) {
903       DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
904       first_objs_arr[page_idx].Assign(prev_obj);
905     } else {
906       prev_obj_end = 0;
907       // It's sufficient to only search for previous object in the preceding page.
908       // If no live object started in that page and some object had started in
909       // the page preceding to that page, which was big enough to overlap with
910       // the current page, then we wouldn't be in the else part.
911       prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
912       if (prev_obj != nullptr) {
913         prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
914                         + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
915       }
916       if (prev_obj_end > begin) {
917         first_objs_arr[page_idx].Assign(prev_obj);
918       } else {
919         // Find the first live object in this page
920         bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
921             begin, begin + gPageSize, [first_objs_arr, page_idx](mirror::Object* obj) {
922               first_objs_arr[page_idx].Assign(obj);
923             });
924       }
925       // An empty entry indicates that the page has no live objects and hence
926       // can be skipped.
927     }
928     begin += gPageSize;
929     page_idx++;
930   }
931   return page_idx;
932 }
933 
PrepareForCompaction()934 bool MarkCompact::PrepareForCompaction() {
935   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
936   size_t chunk_info_per_page = gPageSize / kOffsetChunkSize;
937   size_t vector_len = (black_allocations_begin_ - moving_space_begin_) / kOffsetChunkSize;
938   DCHECK_LE(vector_len, vector_length_);
939   DCHECK_ALIGNED_PARAM(vector_length_, chunk_info_per_page);
940   if (UNLIKELY(vector_len == 0)) {
941     // Nothing to compact.
942     return false;
943   }
944   for (size_t i = 0; i < vector_len; i++) {
945     DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize);
946     DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
947   }
948 
949   // TODO: We can do a lot of neat tricks with this offset vector to tune the
950   // compaction as we wish. Originally, the compaction algorithm slides all
951   // live objects towards the beginning of the heap. This is nice because it
952   // keeps the spatial locality of objects intact.
953   // However, sometimes it's desired to compact objects in certain portions
954   // of the heap. For instance, it is expected that, over time,
955   // objects towards the beginning of the heap are long lived and are always
956   // densely packed. In this case, it makes sense to only update references in
957   // there and not try to compact it.
958   // Furthermore, we might have some large objects and may not want to move such
959   // objects.
960   // We can adjust, without too much effort, the values in the chunk_info_vec_ such
961   // that the objects in the dense beginning area aren't moved. OTOH, large
962   // objects, which could be anywhere in the heap, could also be kept from
963   // moving by using a similar trick. The only issue is that by doing this we will
964   // leave an unused hole in the middle of the heap which can't be used for
965   // allocations until we do a *full* compaction.
966   //
967   // At this point every element in the chunk_info_vec_ contains the live-bytes
968   // of the corresponding chunk. For old-to-new address computation we need
969   // every element to reflect total live-bytes till the corresponding chunk.
970 
971   size_t black_dense_idx = 0;
972   GcCause gc_cause = GetCurrentIteration()->GetGcCause();
973   if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition &&
974       !GetCurrentIteration()->GetClearSoftReferences()) {
975     uint64_t live_bytes = 0, total_bytes = 0;
976     size_t aligned_vec_len = RoundUp(vector_len, chunk_info_per_page);
977     size_t num_pages = aligned_vec_len / chunk_info_per_page;
978     size_t threshold_passing_marker = 0;  // In number of pages
979     std::vector<uint32_t> pages_live_bytes;
980     pages_live_bytes.reserve(num_pages);
981     // Identify the largest chunk towards the beginning of moving space which
982     // passes the black-dense threshold.
983     for (size_t i = 0; i < aligned_vec_len; i += chunk_info_per_page) {
984       uint32_t page_live_bytes = 0;
985       for (size_t j = 0; j < chunk_info_per_page; j++) {
986         page_live_bytes += chunk_info_vec_[i + j];
987         total_bytes += kOffsetChunkSize;
988       }
989       live_bytes += page_live_bytes;
990       pages_live_bytes.push_back(page_live_bytes);
991       if (live_bytes * 100U >= total_bytes * kBlackDenseRegionThreshold) {
992         threshold_passing_marker = pages_live_bytes.size();
993       }
994     }
995     DCHECK_EQ(pages_live_bytes.size(), num_pages);
996     // Eliminate the pages at the end of the chunk which are lower than the threshold.
997     if (threshold_passing_marker > 0) {
998       auto iter = std::find_if(
999           pages_live_bytes.rbegin() + (num_pages - threshold_passing_marker),
1000           pages_live_bytes.rend(),
1001           [](uint32_t bytes) { return bytes * 100U >= gPageSize * kBlackDenseRegionThreshold; });
1002       black_dense_idx = (pages_live_bytes.rend() - iter) * chunk_info_per_page;
1003     }
1004     black_dense_end_ = moving_space_begin_ + black_dense_idx * kOffsetChunkSize;
1005     DCHECK_ALIGNED_PARAM(black_dense_end_, gPageSize);
1006 
1007     // Adjust for class allocated after black_dense_end_ while its object(s)
1008     // are earlier. This is required as we update the references in the
1009     // black-dense region in-place. And if the class pointer of some first
1010     // object for a page, which started in some preceding page, is already
1011     // updated, then we will read wrong class data like ref-offset bitmap.
1012     for (auto iter = class_after_obj_map_.rbegin();
1013          iter != class_after_obj_map_.rend() &&
1014          reinterpret_cast<uint8_t*>(iter->first.AsMirrorPtr()) >= black_dense_end_;
1015          iter++) {
1016       black_dense_end_ =
1017           std::min(black_dense_end_, reinterpret_cast<uint8_t*>(iter->second.AsMirrorPtr()));
1018       black_dense_end_ = AlignDown(black_dense_end_, gPageSize);
1019     }
1020     black_dense_idx = (black_dense_end_ - moving_space_begin_) / kOffsetChunkSize;
1021     DCHECK_LE(black_dense_idx, vector_len);
1022     if (black_dense_idx == vector_len) {
1023       // There is nothing to compact.
1024       return false;
1025     }
1026     InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
1027                               reinterpret_cast<uintptr_t>(black_dense_end_),
1028                               moving_space_bitmap_,
1029                               first_objs_moving_space_);
1030   }
1031 
1032   InitMovingSpaceFirstObjects(vector_len, black_dense_idx / chunk_info_per_page);
1033   non_moving_first_objs_count_ =
1034       InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(non_moving_space_->Begin()),
1035                                 reinterpret_cast<uintptr_t>(non_moving_space_->End()),
1036                                 non_moving_space_->GetLiveBitmap(),
1037                                 first_objs_non_moving_space_);
1038   // Update the vector one past the heap usage as it is required for black
1039   // allocated objects' post-compact address computation.
1040   uint32_t total_bytes;
1041   if (vector_len < vector_length_) {
1042     vector_len++;
1043     total_bytes = 0;
1044   } else {
1045     // Fetch the value stored in the last element before it gets overwritten by
1046     // std::exclusive_scan().
1047     total_bytes = chunk_info_vec_[vector_len - 1];
1048   }
1049   std::exclusive_scan(chunk_info_vec_ + black_dense_idx,
1050                       chunk_info_vec_ + vector_len,
1051                       chunk_info_vec_ + black_dense_idx,
1052                       black_dense_idx * kOffsetChunkSize);
1053   total_bytes += chunk_info_vec_[vector_len - 1];
1054   post_compact_end_ = AlignUp(moving_space_begin_ + total_bytes, gPageSize);
1055   CHECK_EQ(post_compact_end_, moving_space_begin_ + moving_first_objs_count_ * gPageSize)
1056       << "moving_first_objs_count_:" << moving_first_objs_count_
1057       << " black_dense_idx:" << black_dense_idx << " vector_len:" << vector_len
1058       << " total_bytes:" << total_bytes
1059       << " black_dense_end:" << reinterpret_cast<void*>(black_dense_end_)
1060       << " chunk_info_per_page:" << chunk_info_per_page;
1061   black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
1062   // We shouldn't be consuming more space after compaction than pre-compaction.
1063   CHECK_GE(black_objs_slide_diff_, 0);
1064   if (black_objs_slide_diff_ == 0) {
1065     black_dense_end_ = black_allocations_begin_;
1066     return false;
1067   }
1068   for (size_t i = vector_len; i < vector_length_; i++) {
1069     DCHECK_EQ(chunk_info_vec_[i], 0u);
1070   }
1071 
1072   // How do we handle compaction of heap portion used for allocations after the
1073   // marking-pause?
1074   // All allocations after the marking-pause are considered black (reachable)
1075   // for this GC cycle. However, they need not be allocated contiguously as
1076   // different mutators use TLABs. So we will compact the heap till the point
1077   // where allocations took place before the marking-pause. And everything after
1078   // that will be slid with TLAB holes, and then TLAB info in TLS will be
1079   // appropriately updated in the pre-compaction pause.
1080   // The chunk-info vector entries for the post marking-pause allocations will be
1081   // also updated in the pre-compaction pause.
1082 
1083   if (!uffd_initialized_) {
1084     CreateUserfaultfd(/*post_fork=*/false);
1085   }
1086   return true;
1087 }
1088 
1089 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1090  public:
VerifyRootMarkedVisitor(MarkCompact * collector)1091   explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1092 
VisitRoot(mirror::Object * root,const RootInfo & info)1093   void VisitRoot(mirror::Object* root, const RootInfo& info) override
1094       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1095     CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1096   }
1097 
1098  private:
1099   MarkCompact* const collector_;
1100 };
1101 
ReMarkRoots(Runtime * runtime)1102 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1103   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1104   DCHECK_EQ(thread_running_gc_, Thread::Current());
1105   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1106   MarkNonThreadRoots(runtime);
1107   MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots
1108                                                   | kVisitRootFlagStopLoggingNewRoots
1109                                                   | kVisitRootFlagClearRootLog),
1110                       runtime);
1111 
1112   if (kVerifyRootsMarked) {
1113     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1114     VerifyRootMarkedVisitor visitor(this);
1115     runtime->VisitRoots(&visitor);
1116   }
1117 }
1118 
MarkingPause()1119 void MarkCompact::MarkingPause() {
1120   TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1121   Runtime* runtime = Runtime::Current();
1122   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1123   {
1124     // Handle the dirty objects as we are a concurrent GC
1125     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1126     {
1127       MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1128       MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1129       std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1130       for (Thread* thread : thread_list) {
1131         thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1132         DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1133         // Need to revoke all the thread-local allocation stacks since we will
1134         // swap the allocation stacks (below) and don't want anybody to allocate
1135         // into the live stack.
1136         thread->RevokeThreadLocalAllocationStack();
1137         bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1138       }
1139     }
1140     // Fetch only the accumulated objects-allocated count as it is guaranteed to
1141     // be up-to-date after the TLAB revocation above.
1142     freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1143     // Capture 'end' of moving-space at this point. Every allocation beyond this
1144     // point will be considered as black.
1145     // Align-up to page boundary so that black allocations happen from next page
1146     // onwards. Also, it ensures that 'end' is aligned for card-table's
1147     // ClearCardRange().
1148     black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
1149     DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
1150 
1151     // Re-mark root set. Doesn't include thread-roots as they are already marked
1152     // above.
1153     ReMarkRoots(runtime);
1154     // Scan dirty objects.
1155     RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1156     {
1157       TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1158       heap_->SwapStacks();
1159       live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1160     }
1161   }
1162   // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1163   // objects in bump-pointer space when we have a mark-bitmap to indicate live
1164   // objects. At the same time we also need to be able to visit black allocations,
1165   // even though they are not marked in the bitmap. Without both of these we fail
1166   // pre-sweeping verification. As well as we leave windows open wherein a
1167   // VisitObjects/Walk on the space would either miss some objects or visit
1168   // unreachable ones. These windows are when we are switching from shared
1169   // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1170   // heap_->PreSweepingGcVerification(this);
1171 
1172   // Disallow new system weaks to prevent a race which occurs when someone adds
1173   // a new system weak before we sweep them. Since this new system weak may not
1174   // be marked, the GC may incorrectly sweep it. This also fixes a race where
1175   // interning may attempt to return a strong reference to a string that is
1176   // about to be swept.
1177   runtime->DisallowNewSystemWeaks();
1178   // Enable the reference processing slow path, needs to be done with mutators
1179   // paused since there is no lock in the GetReferent fast path.
1180   heap_->GetReferenceProcessor()->EnableSlowPath();
1181   marking_done_ = true;
1182 }
1183 
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1184 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1185   TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1186                                GetTimings());
1187   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1188   runtime->SweepSystemWeaks(this);
1189 }
1190 
ProcessReferences(Thread * self)1191 void MarkCompact::ProcessReferences(Thread* self) {
1192   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1193   GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1194 }
1195 
Sweep(bool swap_bitmaps)1196 void MarkCompact::Sweep(bool swap_bitmaps) {
1197   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1198   // Ensure that nobody inserted objects in the live stack after we swapped the
1199   // stacks.
1200   CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1201   {
1202     TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1203     // Mark everything allocated since the last GC as live so that we can sweep
1204     // concurrently, knowing that new allocations won't be marked as live.
1205     accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1206     heap_->MarkAllocStackAsLive(live_stack);
1207     live_stack->Reset();
1208     DCHECK(mark_stack_->IsEmpty());
1209   }
1210   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1211     if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
1212         !immune_spaces_.ContainsSpace(space)) {
1213       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1214       DCHECK(!alloc_space->IsZygoteSpace());
1215       TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
1216       RecordFree(alloc_space->Sweep(swap_bitmaps));
1217     }
1218   }
1219   SweepLargeObjects(swap_bitmaps);
1220 }
1221 
SweepLargeObjects(bool swap_bitmaps)1222 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1223   space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1224   if (los != nullptr) {
1225     TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1226     RecordFreeLOS(los->Sweep(swap_bitmaps));
1227   }
1228 }
1229 
ReclaimPhase()1230 void MarkCompact::ReclaimPhase() {
1231   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1232   DCHECK(thread_running_gc_ == Thread::Current());
1233   Runtime* const runtime = Runtime::Current();
1234   // Process the references concurrently.
1235   ProcessReferences(thread_running_gc_);
1236   // TODO: Try to merge this system-weak sweeping with the one while updating
1237   // references during the compaction pause.
1238   SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1239   runtime->AllowNewSystemWeaks();
1240   // Clean up class loaders after system weaks are swept since that is how we know if class
1241   // unloading occurred.
1242   runtime->GetClassLinker()->CleanupClassLoaders();
1243   {
1244     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1245     // Reclaim unmarked objects.
1246     Sweep(false);
1247     // Swap the live and mark bitmaps for each space which we modified space. This is an
1248     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1249     // bitmaps.
1250     SwapBitmaps();
1251     // Unbind the live and mark bitmaps.
1252     GetHeap()->UnBindBitmaps();
1253   }
1254 }
1255 
1256 // We want to avoid checking for every reference if it's within the page or
1257 // not. This can be done if we know where in the page the holder object lies.
1258 // If it doesn't overlap either boundaries then we can skip the checks.
1259 template <bool kCheckBegin, bool kCheckEnd>
1260 class MarkCompact::RefsUpdateVisitor {
1261  public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end)1262   explicit RefsUpdateVisitor(MarkCompact* collector,
1263                              mirror::Object* obj,
1264                              uint8_t* begin,
1265                              uint8_t* end)
1266       : collector_(collector),
1267         moving_space_begin_(collector->black_dense_end_),
1268         moving_space_end_(collector->moving_space_end_),
1269         obj_(obj),
1270         begin_(begin),
1271         end_(end) {
1272     DCHECK(!kCheckBegin || begin != nullptr);
1273     DCHECK(!kCheckEnd || end != nullptr);
1274   }
1275 
operator ()(mirror::Object * old,MemberOffset offset,bool is_static) const1276   void operator()([[maybe_unused]] mirror::Object* old,
1277                   MemberOffset offset,
1278                   [[maybe_unused]] bool is_static) const ALWAYS_INLINE
1279       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1280     bool update = true;
1281     if (kCheckBegin || kCheckEnd) {
1282       uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1283       update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1284     }
1285     if (update) {
1286       collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1287     }
1288   }
1289 
1290   // For object arrays we don't need to check boundaries here as it's done in
1291   // VisitReferenes().
1292   // TODO: Optimize reference updating using SIMD instructions. Object arrays
1293   // are perfect as all references are tightly packed.
operator ()(mirror::Object * old,MemberOffset offset,bool is_static,bool is_obj_array) const1294   void operator()([[maybe_unused]] mirror::Object* old,
1295                   MemberOffset offset,
1296                   [[maybe_unused]] bool is_static,
1297                   [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
1298       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1299     collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1300   }
1301 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1302   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1303       ALWAYS_INLINE
1304       REQUIRES_SHARED(Locks::mutator_lock_) {
1305     if (!root->IsNull()) {
1306       VisitRoot(root);
1307     }
1308   }
1309 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1310   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1311       ALWAYS_INLINE
1312       REQUIRES_SHARED(Locks::mutator_lock_) {
1313     collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
1314   }
1315 
1316  private:
1317   MarkCompact* const collector_;
1318   uint8_t* const moving_space_begin_;
1319   uint8_t* const moving_space_end_;
1320   mirror::Object* const obj_;
1321   uint8_t* const begin_;
1322   uint8_t* const end_;
1323 };
1324 
IsValidObject(mirror::Object * obj) const1325 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1326   mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1327   if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1328     return false;
1329   }
1330   return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1331           obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1332 }
1333 
1334 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1335 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1336   if (kIsDebugBuild) {
1337     mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1338     mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1339     mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1340     mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1341     if (HasAddress(pre_compact_klass) &&
1342         reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1343       CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1344           << "ref=" << ref
1345           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1346           << " pre_compact_klass=" << pre_compact_klass
1347           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1348       CHECK(live_words_bitmap_->Test(pre_compact_klass));
1349     }
1350     if (!IsValidObject(ref)) {
1351       std::ostringstream oss;
1352       oss << "Invalid object: "
1353           << "ref=" << ref
1354           << " klass=" << klass
1355           << " klass_klass=" << klass_klass
1356           << " klass_klass_klass=" << klass_klass_klass
1357           << " pre_compact_klass=" << pre_compact_klass
1358           << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1359           << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1360           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1361           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1362 
1363       // Call callback before dumping larger data like RAM and space dumps.
1364       callback(oss);
1365 
1366       oss << " \nobject="
1367           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1368           << " \nklass(from)="
1369           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1370           << "spaces:\n";
1371       heap_->DumpSpaces(oss);
1372       LOG(FATAL) << oss.str();
1373     }
1374   }
1375 }
1376 
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,bool needs_memset_zero)1377 void MarkCompact::CompactPage(mirror::Object* obj,
1378                               uint32_t offset,
1379                               uint8_t* addr,
1380                               bool needs_memset_zero) {
1381   DCHECK(moving_space_bitmap_->Test(obj)
1382          && live_words_bitmap_->Test(obj));
1383   DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1384                                            << " offset=" << offset
1385                                            << " addr=" << static_cast<void*>(addr)
1386                                            << " black_allocs_begin="
1387                                            << static_cast<void*>(black_allocations_begin_)
1388                                            << " post_compact_addr="
1389                                            << static_cast<void*>(post_compact_end_);
1390   uint8_t* const start_addr = addr;
1391   // How many distinct live-strides do we have.
1392   size_t stride_count = 0;
1393   uint8_t* last_stride = addr;
1394   uint32_t last_stride_begin = 0;
1395   auto verify_obj_callback = [&] (std::ostream& os) {
1396                                os << " stride_count=" << stride_count
1397                                   << " last_stride=" << static_cast<void*>(last_stride)
1398                                   << " offset=" << offset
1399                                   << " start_addr=" << static_cast<void*>(start_addr);
1400                              };
1401   obj = GetFromSpaceAddr(obj);
1402   live_words_bitmap_->VisitLiveStrides(
1403       offset,
1404       black_allocations_begin_,
1405       gPageSize,
1406       [&addr, &last_stride, &stride_count, &last_stride_begin, verify_obj_callback, this](
1407           uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
1408           REQUIRES_SHARED(Locks::mutator_lock_) {
1409             const size_t stride_in_bytes = stride_size * kAlignment;
1410             DCHECK_LE(stride_in_bytes, gPageSize);
1411             last_stride_begin = stride_begin;
1412             DCHECK(IsAligned<kAlignment>(addr));
1413             memcpy(addr, from_space_begin_ + stride_begin * kAlignment, stride_in_bytes);
1414             if (kIsDebugBuild) {
1415               uint8_t* space_begin = bump_pointer_space_->Begin();
1416               // We can interpret the first word of the stride as an
1417               // obj only from second stride onwards, as the first
1418               // stride's first-object may have started on previous
1419               // page. The only exception is the first page of the
1420               // moving space.
1421               if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
1422                 mirror::Object* o =
1423                     reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
1424                 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1425                 CHECK(moving_space_bitmap_->Test(o))
1426                     << "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
1427                 VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
1428               }
1429             }
1430             last_stride = addr;
1431             addr += stride_in_bytes;
1432             stride_count++;
1433           });
1434   DCHECK_LT(last_stride, start_addr + gPageSize);
1435   DCHECK_GT(stride_count, 0u);
1436   size_t obj_size = 0;
1437   uint32_t offset_within_obj = offset * kAlignment
1438                                - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_);
1439   // First object
1440   if (offset_within_obj > 0) {
1441     mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1442     if (stride_count > 1) {
1443       RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1444                                                                          to_ref,
1445                                                                          start_addr,
1446                                                                          nullptr);
1447       obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1448               visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1449     } else {
1450       RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1451                                                                         to_ref,
1452                                                                         start_addr,
1453                                                                         start_addr + gPageSize);
1454       obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1455               visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj
1456                                                                      + gPageSize));
1457     }
1458     obj_size = RoundUp(obj_size, kAlignment);
1459     DCHECK_GT(obj_size, offset_within_obj)
1460         << "obj:" << obj << " class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1461         << " to_addr:" << to_ref
1462         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1463         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1464         << " offset:" << offset * kAlignment << " class-after-obj-iter:"
1465         << (class_after_obj_iter_ != class_after_obj_map_.rend() ?
1466                 class_after_obj_iter_->first.AsMirrorPtr() :
1467                 nullptr)
1468         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1469         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1470         << " offset-of-last-idx:"
1471         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1472         << " first-obj-of-last-idx:"
1473         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1474 
1475     obj_size -= offset_within_obj;
1476     // If there is only one stride, then adjust last_stride_begin to the
1477     // end of the first object.
1478     if (stride_count == 1) {
1479       last_stride_begin += obj_size / kAlignment;
1480     }
1481   }
1482 
1483   // Except for the last page being compacted, the pages will have addr ==
1484   // start_addr + gPageSize.
1485   uint8_t* const end_addr = addr;
1486   addr = start_addr;
1487   size_t bytes_done = obj_size;
1488   // All strides except the last one can be updated without any boundary
1489   // checks.
1490   DCHECK_LE(addr, last_stride);
1491   size_t bytes_to_visit = last_stride - addr;
1492   DCHECK_LE(bytes_to_visit, gPageSize);
1493   while (bytes_to_visit > bytes_done) {
1494     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1495     VerifyObject(ref, verify_obj_callback);
1496     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1497             visitor(this, ref, nullptr, nullptr);
1498     obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1499     obj_size = RoundUp(obj_size, kAlignment);
1500     bytes_done += obj_size;
1501   }
1502   // Last stride may have multiple objects in it and we don't know where the
1503   // last object which crosses the page boundary starts, therefore check
1504   // page-end in all of these objects. Also, we need to call
1505   // VisitRefsForCompaction() with from-space object as we fetch object size,
1506   // which in case of klass requires 'class_size_'.
1507   uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1508   bytes_to_visit = end_addr - addr;
1509   DCHECK_LE(bytes_to_visit, gPageSize);
1510   while (bytes_to_visit > bytes_done) {
1511     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1512     obj = reinterpret_cast<mirror::Object*>(from_addr);
1513     VerifyObject(ref, verify_obj_callback);
1514     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
1515             visitor(this, ref, nullptr, start_addr + gPageSize);
1516     obj_size = obj->VisitRefsForCompaction(visitor,
1517                                            MemberOffset(0),
1518                                            MemberOffset(end_addr - (addr + bytes_done)));
1519     obj_size = RoundUp(obj_size, kAlignment);
1520     DCHECK_GT(obj_size, 0u)
1521         << "from_addr:" << obj
1522         << " from-space-class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1523         << " to_addr:" << ref
1524         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1525         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1526         << " offset:" << offset * kAlignment << " bytes_done:" << bytes_done
1527         << " class-after-obj-iter:"
1528         << (class_after_obj_iter_ != class_after_obj_map_.rend() ?
1529                 class_after_obj_iter_->first.AsMirrorPtr() :
1530                 nullptr)
1531         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1532         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1533         << " offset-of-last-idx:"
1534         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1535         << " first-obj-of-last-idx:"
1536         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1537 
1538     from_addr += obj_size;
1539     bytes_done += obj_size;
1540   }
1541   // The last page that we compact may have some bytes left untouched in the
1542   // end, we should zero them as the kernel copies at page granularity.
1543   if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
1544     std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
1545   }
1546 }
1547 
1548 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1549 // size. If more TLAB(s) started in this page, then those chunks are identified
1550 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1551 // If we find a set bit in the bitmap, then we copy the remaining page and then
1552 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,mirror::Object * next_page_first_obj,uint32_t first_chunk_size,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1553 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1554                                  mirror::Object* next_page_first_obj,
1555                                  uint32_t first_chunk_size,
1556                                  uint8_t* const pre_compact_page,
1557                                  uint8_t* dest,
1558                                  bool needs_memset_zero) {
1559   DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
1560   size_t bytes_copied;
1561   uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1562   uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1563   uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
1564   uint8_t* const dest_page_end = dest + gPageSize;
1565 
1566   auto verify_obj_callback = [&] (std::ostream& os) {
1567                                os << " first_obj=" << first_obj
1568                                   << " next_page_first_obj=" << next_page_first_obj
1569                                   << " first_chunk_sie=" << first_chunk_size
1570                                   << " dest=" << static_cast<void*>(dest)
1571                                   << " pre_compact_page="
1572                                   << static_cast<void* const>(pre_compact_page);
1573                              };
1574   // We have empty portion at the beginning of the page. Zero it.
1575   if (pre_compact_addr > pre_compact_page) {
1576     bytes_copied = pre_compact_addr - pre_compact_page;
1577     DCHECK_LT(bytes_copied, gPageSize);
1578     if (needs_memset_zero) {
1579       std::memset(dest, 0x0, bytes_copied);
1580     }
1581     dest += bytes_copied;
1582   } else {
1583     bytes_copied = 0;
1584     size_t offset = pre_compact_page - pre_compact_addr;
1585     pre_compact_addr = pre_compact_page;
1586     src_addr += offset;
1587     DCHECK(IsAlignedParam(src_addr, gPageSize));
1588   }
1589   // Copy the first chunk of live words
1590   std::memcpy(dest, src_addr, first_chunk_size);
1591   // Update references in the first chunk. Use object size to find next object.
1592   {
1593     size_t bytes_to_visit = first_chunk_size;
1594     size_t obj_size;
1595     // The first object started in some previous page. So we need to check the
1596     // beginning.
1597     DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1598     size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1599     if (bytes_copied == 0 && offset > 0) {
1600       mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1601       mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1602       // If the next page's first-obj is in this page or nullptr, then we don't
1603       // need to check end boundary
1604       if (next_page_first_obj == nullptr
1605           || (first_obj != next_page_first_obj
1606               && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1607         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1608                                                                            to_obj,
1609                                                                            dest,
1610                                                                            nullptr);
1611         obj_size = from_obj->VisitRefsForCompaction<
1612                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1613                                                                    MemberOffset(offset),
1614                                                                    MemberOffset(-1));
1615       } else {
1616         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1617                                                                           to_obj,
1618                                                                           dest,
1619                                                                           dest_page_end);
1620         obj_size = from_obj->VisitRefsForCompaction<
1621                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1622                                                                    MemberOffset(offset),
1623                                                                    MemberOffset(offset
1624                                                                                 + gPageSize));
1625         if (first_obj == next_page_first_obj) {
1626           // First object is the only object on this page. So there's nothing else left to do.
1627           return;
1628         }
1629       }
1630       obj_size = RoundUp(obj_size, kAlignment);
1631       obj_size -= offset;
1632       dest += obj_size;
1633       bytes_to_visit -= obj_size;
1634     }
1635     bytes_copied += first_chunk_size;
1636     // If the last object in this page is next_page_first_obj, then we need to check end boundary
1637     bool check_last_obj = false;
1638     if (next_page_first_obj != nullptr
1639         && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
1640         && bytes_copied == gPageSize) {
1641       size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
1642       DCHECK_LE(diff, gPageSize);
1643       DCHECK_LE(diff, bytes_to_visit);
1644       bytes_to_visit -= diff;
1645       check_last_obj = true;
1646     }
1647     while (bytes_to_visit > 0) {
1648       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1649       VerifyObject(dest_obj, verify_obj_callback);
1650       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
1651                                                                           dest_obj,
1652                                                                           nullptr,
1653                                                                           nullptr);
1654       obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1655       obj_size = RoundUp(obj_size, kAlignment);
1656       bytes_to_visit -= obj_size;
1657       dest += obj_size;
1658     }
1659     DCHECK_EQ(bytes_to_visit, 0u);
1660     if (check_last_obj) {
1661       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1662       VerifyObject(dest_obj, verify_obj_callback);
1663       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1664                                                                          dest_obj,
1665                                                                          nullptr,
1666                                                                          dest_page_end);
1667       mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
1668       obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1669                                                           MemberOffset(0),
1670                                                           MemberOffset(dest_page_end - dest));
1671       return;
1672     }
1673   }
1674 
1675   // Probably a TLAB finished on this page and/or a new TLAB started as well.
1676   if (bytes_copied < gPageSize) {
1677     src_addr += first_chunk_size;
1678     pre_compact_addr += first_chunk_size;
1679     // Use mark-bitmap to identify where objects are. First call
1680     // VisitMarkedRange for only the first marked bit. If found, zero all bytes
1681     // until that object and then call memcpy on the rest of the page.
1682     // Then call VisitMarkedRange for all marked bits *after* the one found in
1683     // this invocation. This time to visit references.
1684     uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
1685     uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
1686     mirror::Object* found_obj = nullptr;
1687     moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
1688                                                                 page_end,
1689                                                                 [&found_obj](mirror::Object* obj) {
1690                                                                   found_obj = obj;
1691                                                                 });
1692     size_t remaining_bytes = gPageSize - bytes_copied;
1693     if (found_obj == nullptr) {
1694       if (needs_memset_zero) {
1695         // No more black objects in this page. Zero the remaining bytes and return.
1696         std::memset(dest, 0x0, remaining_bytes);
1697       }
1698       return;
1699     }
1700     // Copy everything in this page, which includes any zeroed regions
1701     // in-between.
1702     std::memcpy(dest, src_addr, remaining_bytes);
1703     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1704     moving_space_bitmap_->VisitMarkedRange(
1705             reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
1706             page_end,
1707             [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
1708             REQUIRES_SHARED(Locks::mutator_lock_) {
1709               ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1710               mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1711               VerifyObject(ref, verify_obj_callback);
1712               RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1713                       visitor(this, ref, nullptr, nullptr);
1714               ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1715                                                                   MemberOffset(0),
1716                                                                   MemberOffset(-1));
1717               // Remember for next round.
1718               found_obj = obj;
1719             });
1720     // found_obj may have been updated in VisitMarkedRange. Visit the last found
1721     // object.
1722     DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
1723     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1724     ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1725     mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
1726     VerifyObject(dest_obj, verify_obj_callback);
1727     RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
1728         this, dest_obj, nullptr, dest_page_end);
1729     // Last object could overlap with next page. And if it happens to be a
1730     // class, then we may access something (like static-fields' offsets) which
1731     // is on the next page. Therefore, use from-space's reference.
1732     mirror::Object* obj = GetFromSpaceAddr(found_obj);
1733     obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
1734         visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
1735   }
1736 }
1737 
1738 template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
BackOff(uint32_t i)1739 static void BackOff(uint32_t i) {
1740   // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
1741   if (i <= kYieldMax) {
1742     sched_yield();
1743   } else {
1744     // nanosleep is not in the async-signal-safe list, but bionic implements it
1745     // with a pure system call, so it should be fine.
1746     NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
1747   }
1748 }
1749 
ZeropageIoctl(void * addr,size_t length,bool tolerate_eexist,bool tolerate_enoent)1750 size_t MarkCompact::ZeropageIoctl(void* addr,
1751                                   size_t length,
1752                                   bool tolerate_eexist,
1753                                   bool tolerate_enoent) {
1754   int32_t backoff_count = -1;
1755   int32_t max_backoff = 10;  // max native priority.
1756   struct uffdio_zeropage uffd_zeropage;
1757   DCHECK(IsAlignedParam(addr, gPageSize));
1758   uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
1759   uffd_zeropage.range.len = length;
1760   uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
1761   while (true) {
1762     uffd_zeropage.zeropage = 0;
1763     int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
1764     if (ret == 0) {
1765       DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
1766       return length;
1767     } else if (errno == EAGAIN) {
1768       if (uffd_zeropage.zeropage > 0) {
1769         // Contention was observed after acquiring mmap_lock. But the first page
1770         // is already done, which is what we care about.
1771         DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
1772         DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
1773         return uffd_zeropage.zeropage;
1774       } else if (uffd_zeropage.zeropage < 0) {
1775         // mmap_read_trylock() failed due to contention. Back-off and retry.
1776         DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
1777         if (backoff_count == -1) {
1778           int prio = Thread::Current()->GetNativePriority();
1779           DCHECK(prio > 0 && prio <= 10) << prio;
1780           max_backoff -= prio;
1781           backoff_count = 0;
1782         }
1783         if (backoff_count < max_backoff) {
1784           // Using 3 to align 'normal' priority threads with sleep.
1785           BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
1786         } else {
1787           uffd_zeropage.mode = 0;
1788         }
1789       }
1790     } else if (tolerate_eexist && errno == EEXIST) {
1791       // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
1792       // wouldn't be included in it.
1793       return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
1794     } else {
1795       CHECK(tolerate_enoent && errno == ENOENT)
1796           << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
1797       return 0;
1798     }
1799   }
1800 }
1801 
CopyIoctl(void * dst,void * buffer,size_t length,bool return_on_contention,bool tolerate_enoent)1802 size_t MarkCompact::CopyIoctl(
1803     void* dst, void* buffer, size_t length, bool return_on_contention, bool tolerate_enoent) {
1804   int32_t backoff_count = -1;
1805   int32_t max_backoff = 10;  // max native priority.
1806   struct uffdio_copy uffd_copy;
1807   uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
1808   uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
1809   uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
1810   uffd_copy.len = length;
1811   uffd_copy.copy = 0;
1812   while (true) {
1813     int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
1814     if (ret == 0) {
1815       DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
1816       break;
1817     } else if (errno == EAGAIN) {
1818       // Contention observed.
1819       DCHECK_NE(uffd_copy.copy, 0);
1820       if (uffd_copy.copy > 0) {
1821         // Contention was observed after acquiring mmap_lock.
1822         DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
1823         DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
1824         break;
1825       } else {
1826         // mmap_read_trylock() failed due to contention.
1827         DCHECK_EQ(uffd_copy.copy, -EAGAIN);
1828         uffd_copy.copy = 0;
1829         if (return_on_contention) {
1830           break;
1831         }
1832       }
1833       if (backoff_count == -1) {
1834         int prio = Thread::Current()->GetNativePriority();
1835         DCHECK(prio > 0 && prio <= 10) << prio;
1836         max_backoff -= prio;
1837         backoff_count = 0;
1838       }
1839       if (backoff_count < max_backoff) {
1840         // Using 3 to align 'normal' priority threads with sleep.
1841         BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
1842       } else {
1843         uffd_copy.mode = 0;
1844       }
1845     } else if (errno == EEXIST) {
1846       DCHECK_NE(uffd_copy.copy, 0);
1847       if (uffd_copy.copy < 0) {
1848         uffd_copy.copy = 0;
1849       }
1850       // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
1851       // wouldn't be included in it.
1852       uffd_copy.copy += gPageSize;
1853       break;
1854     } else {
1855       CHECK(tolerate_enoent && errno == ENOENT)
1856           << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
1857           << " dst:" << dst;
1858       return uffd_copy.copy > 0 ? uffd_copy.copy : 0;
1859     }
1860   }
1861   return uffd_copy.copy;
1862 }
1863 
1864 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,uint8_t * to_space_page,uint8_t * page,bool map_immediately,CompactionFn func)1865 bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
1866                                                   uint8_t* to_space_page,
1867                                                   uint8_t* page,
1868                                                   bool map_immediately,
1869                                                   CompactionFn func) {
1870   uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
1871   uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
1872                                                                   PageState::kProcessing);
1873   // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
1874   // to moving_spaces_status_[page_idx] is released before the contents of the page are
1875   // made accessible to other threads.
1876   //
1877   // We need acquire ordering here to ensure that when the CAS fails, another thread
1878   // has completed processing the page, which is guaranteed by the release below.
1879   if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
1880                                     expected_state, desired_state, std::memory_order_acquire)) {
1881     func();
1882     if (kMode == kCopyMode) {
1883       if (map_immediately) {
1884         CopyIoctl(to_space_page,
1885                   page,
1886                   gPageSize,
1887                   /*return_on_contention=*/false,
1888                   /*tolerate_enoent=*/false);
1889         // Store is sufficient as no other thread could modify the status at this
1890         // point. Relaxed order is sufficient as the ioctl will act as a fence.
1891         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
1892                                              std::memory_order_relaxed);
1893       } else {
1894         // Add the src page's index in the status word.
1895         DCHECK(from_space_map_.HasAddress(page));
1896         DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
1897                   std::numeric_limits<uint32_t>::max());
1898         uint32_t store_val = page - from_space_begin_;
1899         DCHECK_EQ(store_val & kPageStateMask, 0u);
1900         store_val |= static_cast<uint8_t>(PageState::kProcessed);
1901         // Store is sufficient as no other thread would modify the status at this point.
1902         moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
1903       }
1904     }
1905     return true;
1906   } else {
1907     // Only GC thread could have set the state to Processed.
1908     DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
1909     return false;
1910   }
1911 }
1912 
FreeFromSpacePages(size_t cur_page_idx,int mode,size_t end_idx_for_mapping)1913 bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
1914   // Thanks to sliding compaction, bump-pointer allocations, and reverse
1915   // compaction (see CompactMovingSpace) the logic here is pretty simple: find
1916   // the to-space page up to which compaction has finished, all the from-space
1917   // pages corresponding to this onwards can be freed. There are some corner
1918   // cases to be taken care of, which are described below.
1919   size_t idx = last_checked_reclaim_page_idx_;
1920   // Find the to-space page up to which the corresponding from-space pages can be
1921   // freed.
1922   for (; idx > cur_page_idx; idx--) {
1923     PageState state = GetMovingPageState(idx - 1);
1924     if (state == PageState::kMutatorProcessing) {
1925       // Some mutator is working on the page.
1926       break;
1927     }
1928     DCHECK(state >= PageState::kProcessed ||
1929            (state == PageState::kUnprocessed &&
1930             (mode == kFallbackMode || idx > moving_first_objs_count_)));
1931   }
1932   DCHECK_LE(idx, last_checked_reclaim_page_idx_);
1933   if (idx == last_checked_reclaim_page_idx_) {
1934     // Nothing to do.
1935     return false;
1936   }
1937 
1938   uint8_t* reclaim_begin;
1939   uint8_t* idx_addr;
1940   // Calculate the first from-space page to be freed using 'idx'. If the
1941   // first-object of the idx'th to-space page started before the corresponding
1942   // from-space page, which is almost always the case in the compaction portion
1943   // of the moving-space, then it indicates that the subsequent pages that are
1944   // yet to be compacted will need the from-space pages. Therefore, find the page
1945   // (from the already compacted pages) whose first-object is different from
1946   // ours. All the from-space pages starting from that one are safe to be
1947   // removed. Please note that this iteration is not expected to be long in
1948   // normal cases as objects are smaller than page size.
1949   if (idx >= moving_first_objs_count_) {
1950     // black-allocated portion of the moving-space
1951     idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
1952     reclaim_begin = idx_addr;
1953     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
1954     if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
1955       size_t idx_len = moving_first_objs_count_ + black_page_count_;
1956       for (size_t i = idx + 1; i < idx_len; i++) {
1957         mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
1958         // A null first-object indicates that the corresponding to-space page is
1959         // not used yet. So we can compute its from-space page and use that.
1960         if (obj != first_obj) {
1961           reclaim_begin = obj != nullptr
1962                           ? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
1963                           : (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
1964           break;
1965         }
1966       }
1967     }
1968   } else {
1969     DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
1970     idx_addr = moving_space_begin_ + idx * gPageSize;
1971     if (idx_addr >= black_dense_end_) {
1972       idx_addr = moving_space_begin_ + pre_compact_offset_moving_space_[idx] * kAlignment;
1973     }
1974     reclaim_begin = idx_addr;
1975     DCHECK_LE(reclaim_begin, black_allocations_begin_);
1976     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
1977     if (first_obj != nullptr) {
1978       if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
1979         DCHECK_LT(idx, moving_first_objs_count_);
1980         mirror::Object* obj = first_obj;
1981         for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
1982           obj = first_objs_moving_space_[i].AsMirrorPtr();
1983           if (obj == nullptr) {
1984             reclaim_begin = moving_space_begin_ + i * gPageSize;
1985             break;
1986           } else if (first_obj != obj) {
1987             DCHECK_LT(first_obj, obj);
1988             DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
1989             reclaim_begin = reinterpret_cast<uint8_t*>(obj);
1990             break;
1991           }
1992         }
1993         if (obj == first_obj) {
1994           reclaim_begin = black_allocations_begin_;
1995         }
1996       }
1997     }
1998     reclaim_begin = AlignUp(reclaim_begin, gPageSize);
1999   }
2000 
2001   DCHECK_NE(reclaim_begin, nullptr);
2002   DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
2003   DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
2004   // Check if the 'class_after_obj_map_' map allows pages to be freed.
2005   for (; class_after_obj_iter_ != class_after_obj_map_.rend(); class_after_obj_iter_++) {
2006     mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2007     mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2008     // Check with class' end to ensure that, if required, the entire class survives.
2009     uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2010     DCHECK_LE(klass_end, last_reclaimed_page_);
2011     if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2012       // Found a class which is in the reclaim range.
2013       if (reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr()) < idx_addr) {
2014         // Its lowest-address object is not compacted yet. Reclaim starting from
2015         // the end of this class.
2016         reclaim_begin = AlignUp(klass_end, gPageSize);
2017       } else {
2018         // Continue consuming pairs wherein the lowest address object has already
2019         // been compacted.
2020         continue;
2021       }
2022     }
2023     // All the remaining class (and thereby corresponding object) addresses are
2024     // lower than the reclaim range.
2025     break;
2026   }
2027   bool all_mapped = mode == kFallbackMode;
2028   ssize_t size = last_reclaimed_page_ - reclaim_begin;
2029   if (size > kMinFromSpaceMadviseSize) {
2030     // Map all the pages in the range.
2031     if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
2032       if (MapMovingSpacePages(cur_page_idx,
2033                               end_idx_for_mapping,
2034                               /*from_ioctl=*/false,
2035                               /*return_on_contention=*/true,
2036                               /*tolerate_enoent=*/false) == end_idx_for_mapping - cur_page_idx) {
2037         all_mapped = true;
2038       }
2039     } else {
2040       // This for the black-allocations pages so that madvise is not missed.
2041       all_mapped = true;
2042     }
2043     // If not all pages are mapped, then take it as a hint that mmap_lock is
2044     // contended and hence don't madvise as that also needs the same lock.
2045     if (all_mapped) {
2046       // Retain a few pages for subsequent compactions.
2047       const ssize_t gBufferPages = 4 * gPageSize;
2048       DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
2049       size -= gBufferPages;
2050       uint8_t* addr = last_reclaimed_page_ - size;
2051       CHECK_EQ(madvise(addr + from_space_slide_diff_, size, MADV_DONTNEED), 0)
2052           << "madvise of from-space failed: " << strerror(errno);
2053       last_reclaimed_page_ = addr;
2054       cur_reclaimable_page_ = addr;
2055     }
2056   }
2057   last_reclaimable_page_ = std::min(reclaim_begin, last_reclaimable_page_);
2058   last_checked_reclaim_page_idx_ = idx;
2059   return all_mapped;
2060 }
2061 
2062 template <int kMode>
CompactMovingSpace(uint8_t * page)2063 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2064   // For every page we have a starting object, which may have started in some
2065   // preceding page, and an offset within that object from where we must start
2066   // copying.
2067   // Consult the live-words bitmap to copy all contiguously live words at a
2068   // time. These words may constitute multiple objects. To avoid the need for
2069   // consulting mark-bitmap to find where does the next live object start, we
2070   // use the object-size returned by VisitRefsForCompaction.
2071   //
2072   // We do the compaction in reverse direction so that the pages containing
2073   // TLAB and latest allocations are processed first.
2074   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2075   size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2076   size_t idx = page_status_arr_len;
2077   size_t black_dense_end_idx = (black_dense_end_ - moving_space_begin_) / gPageSize;
2078   uint8_t* to_space_end = moving_space_begin_ + page_status_arr_len * gPageSize;
2079   uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
2080 
2081   DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
2082 
2083   // These variables are maintained by FreeFromSpacePages().
2084   last_reclaimed_page_ = pre_compact_page;
2085   last_reclaimable_page_ = last_reclaimed_page_;
2086   cur_reclaimable_page_ = last_reclaimed_page_;
2087   last_checked_reclaim_page_idx_ = idx;
2088   class_after_obj_iter_ = class_after_obj_map_.rbegin();
2089   // Allocated-black pages
2090   mirror::Object* next_page_first_obj = nullptr;
2091   while (idx > moving_first_objs_count_) {
2092     idx--;
2093     pre_compact_page -= gPageSize;
2094     to_space_end -= gPageSize;
2095     if (kMode == kFallbackMode) {
2096       page = to_space_end;
2097     }
2098     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2099     uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
2100     if (first_obj != nullptr) {
2101       DoPageCompactionWithStateChange<kMode>(idx,
2102                                              to_space_end,
2103                                              page,
2104                                              /*map_immediately=*/true,
2105                                              [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2106                                                SlideBlackPage(first_obj,
2107                                                               next_page_first_obj,
2108                                                               first_chunk_size,
2109                                                               pre_compact_page,
2110                                                               page,
2111                                                               kMode == kCopyMode);
2112                                              });
2113       // We are sliding here, so no point attempting to madvise for every
2114       // page. Wait for enough pages to be done.
2115       if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
2116         FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
2117       }
2118     }
2119     next_page_first_obj = first_obj;
2120   }
2121   DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2122   // Reserved page to be used if we can't find any reclaimable page for processing.
2123   uint8_t* reserve_page = page;
2124   size_t end_idx_for_mapping = idx;
2125   while (idx > black_dense_end_idx) {
2126     idx--;
2127     to_space_end -= gPageSize;
2128     if (kMode == kFallbackMode) {
2129       page = to_space_end;
2130     } else {
2131       DCHECK_EQ(kMode, kCopyMode);
2132       if (cur_reclaimable_page_ > last_reclaimable_page_) {
2133         cur_reclaimable_page_ -= gPageSize;
2134         page = cur_reclaimable_page_ + from_space_slide_diff_;
2135       } else {
2136         page = reserve_page;
2137       }
2138     }
2139     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2140     bool success = DoPageCompactionWithStateChange<kMode>(
2141         idx,
2142         to_space_end,
2143         page,
2144         /*map_immediately=*/page == reserve_page,
2145         [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2146           CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
2147         });
2148     if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
2149       // map the pages in the following address as they can't be mapped with the
2150       // pages yet-to-be-compacted as their src-side pages won't be contiguous.
2151       MapMovingSpacePages(idx + 1,
2152                           end_idx_for_mapping,
2153                           /*from_fault=*/false,
2154                           /*return_on_contention=*/true,
2155                           /*tolerate_enoent=*/false);
2156     }
2157     if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2158       end_idx_for_mapping = idx;
2159     }
2160   }
2161   while (idx > 0) {
2162     idx--;
2163     to_space_end -= gPageSize;
2164     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2165     if (first_obj != nullptr) {
2166       DoPageCompactionWithStateChange<kMode>(
2167           idx,
2168           to_space_end,
2169           to_space_end + from_space_slide_diff_,
2170           /*map_immediately=*/false,
2171           [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2172             UpdateNonMovingPage(
2173                 first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
2174             if (kMode == kFallbackMode) {
2175               memcpy(to_space_end, to_space_end + from_space_slide_diff_, gPageSize);
2176             }
2177           });
2178     } else {
2179       // The page has no reachable object on it. Just declare it mapped.
2180       // Mutators shouldn't step on this page, which is asserted in sigbus
2181       // handler.
2182       DCHECK_EQ(moving_pages_status_[idx].load(std::memory_order_relaxed),
2183                 static_cast<uint8_t>(PageState::kUnprocessed));
2184       moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2185                                       std::memory_order_release);
2186     }
2187     if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2188       end_idx_for_mapping = idx;
2189     }
2190   }
2191   // map one last time to finish anything left.
2192   if (kMode == kCopyMode && end_idx_for_mapping > 0) {
2193     MapMovingSpacePages(idx,
2194                         end_idx_for_mapping,
2195                         /*from_fault=*/false,
2196                         /*return_on_contention=*/false,
2197                         /*tolerate_enoent=*/false);
2198   }
2199   DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2200 }
2201 
MapMovingSpacePages(size_t start_idx,size_t arr_len,bool from_fault,bool return_on_contention,bool tolerate_enoent)2202 size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
2203                                         size_t arr_len,
2204                                         bool from_fault,
2205                                         bool return_on_contention,
2206                                         bool tolerate_enoent) {
2207   DCHECK_LT(start_idx, arr_len);
2208   size_t arr_idx = start_idx;
2209   bool wait_for_unmapped = false;
2210   while (arr_idx < arr_len) {
2211     size_t map_count = 0;
2212     uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
2213     // Find a contiguous range that can be mapped with single ioctl.
2214     for (uint32_t i = arr_idx, from_page = cur_state & ~kPageStateMask; i < arr_len;
2215          i++, map_count++, from_page += gPageSize) {
2216       uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
2217       uint32_t cur_from_page = s & ~kPageStateMask;
2218       if (GetPageStateFromWord(s) != PageState::kProcessed || cur_from_page != from_page) {
2219         break;
2220       }
2221     }
2222 
2223     if (map_count == 0) {
2224       if (from_fault) {
2225         bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
2226         return mapped ? 1 : 0;
2227       }
2228       // Skip the pages that this thread cannot map.
2229       for (; arr_idx < arr_len; arr_idx++) {
2230         PageState s = GetMovingPageState(arr_idx);
2231         if (s == PageState::kProcessed) {
2232           break;
2233         } else if (s != PageState::kProcessedAndMapped) {
2234           wait_for_unmapped = true;
2235         }
2236       }
2237     } else {
2238       uint32_t from_space_offset = cur_state & ~kPageStateMask;
2239       uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
2240       uint8_t* from_space_start = from_space_begin_ + from_space_offset;
2241       DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2242       DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
2243       size_t mapped_len = CopyIoctl(to_space_start,
2244                                     from_space_start,
2245                                     map_count * gPageSize,
2246                                     return_on_contention,
2247                                     tolerate_enoent);
2248       for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
2249         // Store is sufficient as anyone storing is doing it with the same value.
2250         moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2251                                             std::memory_order_release);
2252       }
2253       if (from_fault) {
2254         return DivideByPageSize(mapped_len);
2255       }
2256       // We can return from COPY ioctl with a smaller length also if a page
2257       // was found to be already mapped. But that doesn't count as contention.
2258       if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
2259         return arr_idx - start_idx;
2260       }
2261     }
2262   }
2263   if (wait_for_unmapped) {
2264     for (size_t i = start_idx; i < arr_len; i++) {
2265       PageState s = GetMovingPageState(i);
2266       DCHECK_GT(s, PageState::kProcessed);
2267       uint32_t backoff_count = 0;
2268       while (s != PageState::kProcessedAndMapped) {
2269         BackOff(backoff_count++);
2270         s = GetMovingPageState(i);
2271       }
2272     }
2273   }
2274   return arr_len - start_idx;
2275 }
2276 
UpdateNonMovingPage(mirror::Object * first,uint8_t * page,ptrdiff_t from_space_diff,accounting::ContinuousSpaceBitmap * bitmap)2277 void MarkCompact::UpdateNonMovingPage(mirror::Object* first,
2278                                       uint8_t* page,
2279                                       ptrdiff_t from_space_diff,
2280                                       accounting::ContinuousSpaceBitmap* bitmap) {
2281   DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
2282   // For every object found in the page, visit the previous object. This ensures
2283   // that we can visit without checking page-end boundary.
2284   // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2285   // super-class loads require it.
2286   // TODO: Set kVisitNativeRoots to false once we implement concurrent
2287   // compaction
2288   mirror::Object* curr_obj = first;
2289   uint8_t* from_page = page + from_space_diff;
2290   uint8_t* from_page_end = from_page + gPageSize;
2291   bitmap->VisitMarkedRange(
2292       reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize,
2293       reinterpret_cast<uintptr_t>(page + gPageSize),
2294       [&](mirror::Object* next_obj) {
2295         mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(
2296             reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2297         if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2298           RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false> visitor(
2299               this, from_obj, from_page, from_page_end);
2300           MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2301           // Native roots shouldn't be visited as they are done when this
2302           // object's beginning was visited in the preceding page.
2303           from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2304               visitor, begin_offset, MemberOffset(-1));
2305         } else {
2306           RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false> visitor(
2307               this, from_obj, from_page, from_page_end);
2308           from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2309               visitor, MemberOffset(0), MemberOffset(-1));
2310         }
2311         curr_obj = next_obj;
2312       });
2313 
2314   mirror::Object* from_obj =
2315       reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2316   MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2317   if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2318     RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true> visitor(
2319         this, from_obj, from_page, from_page_end);
2320     from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2321         visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2322   } else {
2323     RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
2324         this, from_obj, from_page, from_page_end);
2325     from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(visitor, MemberOffset(0), end_offset);
2326   }
2327 }
2328 
UpdateNonMovingSpace()2329 void MarkCompact::UpdateNonMovingSpace() {
2330   TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
2331   // Iterating in reverse ensures that the class pointer in objects which span
2332   // across more than one page gets updated in the end. This is necessary for
2333   // VisitRefsForCompaction() to work correctly.
2334   // TODO: If and when we make non-moving space update concurrent, implement a
2335   // mechanism to remember class pointers for such objects off-heap and pass it
2336   // to VisitRefsForCompaction().
2337   uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
2338   for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2339     mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2340     page -= gPageSize;
2341     // null means there are no objects on the page to update references.
2342     if (obj != nullptr) {
2343       UpdateNonMovingPage(obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
2344     }
2345   }
2346 }
2347 
UpdateMovingSpaceBlackAllocations()2348 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2349   // For sliding black pages, we need the first-object, which overlaps with the
2350   // first byte of the page. Additionally, we compute the size of first chunk of
2351   // black objects. This will suffice for most black pages. Unlike, compaction
2352   // pages, here we don't need to pre-compute the offset within first-obj from
2353   // where sliding has to start. That can be calculated using the pre-compact
2354   // address of the page. Therefore, to save space, we store the first chunk's
2355   // size in black_alloc_pages_first_chunk_size_ array.
2356   // For the pages which may have holes after the first chunk, which could happen
2357   // if a new TLAB starts in the middle of the page, we mark the objects in
2358   // the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
2359   // then we use the mark-bitmap for the remainder of the page.
2360   uint8_t* const begin = bump_pointer_space_->Begin();
2361   uint8_t* black_allocs = black_allocations_begin_;
2362   DCHECK_LE(begin, black_allocs);
2363   size_t consumed_blocks_count = 0;
2364   size_t first_block_size;
2365   // Needed only for debug at the end of the function. Hopefully compiler will
2366   // eliminate it otherwise.
2367   size_t num_blocks = 0;
2368   // Get the list of all blocks allocated in the bump-pointer space.
2369   std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2370                                                                         &first_block_size);
2371   DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2372   if (block_sizes != nullptr) {
2373     size_t black_page_idx = moving_first_objs_count_;
2374     uint8_t* block_end = begin + first_block_size;
2375     uint32_t remaining_chunk_size = 0;
2376     uint32_t first_chunk_size = 0;
2377     mirror::Object* first_obj = nullptr;
2378     num_blocks = block_sizes->size();
2379     for (size_t block_size : *block_sizes) {
2380       block_end += block_size;
2381       // Skip the blocks that are prior to the black allocations. These will be
2382       // merged with the main-block later.
2383       if (black_allocs >= block_end) {
2384         consumed_blocks_count++;
2385         continue;
2386       }
2387       mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2388       bool set_mark_bit = remaining_chunk_size > 0;
2389       // We don't know how many objects are allocated in the current block. When we hit
2390       // a null assume it's the end. This works as every block is expected to
2391       // have objects allocated linearly using bump-pointer.
2392       // BumpPointerSpace::Walk() also works similarly.
2393       while (black_allocs < block_end
2394              && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2395         // Try to keep instructions which access class instance together to
2396         // avoid reloading the pointer from object.
2397         size_t obj_size = obj->SizeOf();
2398         bytes_scanned_ += obj_size;
2399         obj_size = RoundUp(obj_size, kAlignment);
2400         UpdateClassAfterObjectMap(obj);
2401         if (first_obj == nullptr) {
2402           first_obj = obj;
2403         }
2404         // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2405         // the middle of the page.
2406         if (set_mark_bit) {
2407           moving_space_bitmap_->Set(obj);
2408         }
2409         // Handle objects which cross page boundary, including objects larger
2410         // than page size.
2411         if (remaining_chunk_size + obj_size >= gPageSize) {
2412           set_mark_bit = false;
2413           first_chunk_size += gPageSize - remaining_chunk_size;
2414           remaining_chunk_size += obj_size;
2415           // We should not store first-object and remaining_chunk_size if there were
2416           // unused bytes before this TLAB, in which case we must have already
2417           // stored the values (below).
2418           if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2419             black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2420             first_objs_moving_space_[black_page_idx].Assign(first_obj);
2421           }
2422           black_page_idx++;
2423           remaining_chunk_size -= gPageSize;
2424           // Consume an object larger than page size.
2425           while (remaining_chunk_size >= gPageSize) {
2426             black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
2427             first_objs_moving_space_[black_page_idx].Assign(obj);
2428             black_page_idx++;
2429             remaining_chunk_size -= gPageSize;
2430           }
2431           first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2432           first_chunk_size = remaining_chunk_size;
2433         } else {
2434           DCHECK_LE(first_chunk_size, remaining_chunk_size);
2435           first_chunk_size += obj_size;
2436           remaining_chunk_size += obj_size;
2437         }
2438         black_allocs += obj_size;
2439         obj = reinterpret_cast<mirror::Object*>(black_allocs);
2440       }
2441       DCHECK_LE(black_allocs, block_end);
2442       DCHECK_LT(remaining_chunk_size, gPageSize);
2443       // consume the unallocated portion of the block
2444       if (black_allocs < block_end) {
2445         // first-chunk of the current page ends here. Store it.
2446         if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2447           black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2448           first_objs_moving_space_[black_page_idx].Assign(first_obj);
2449         }
2450         first_chunk_size = 0;
2451         first_obj = nullptr;
2452         size_t page_remaining = gPageSize - remaining_chunk_size;
2453         size_t block_remaining = block_end - black_allocs;
2454         if (page_remaining <= block_remaining) {
2455           block_remaining -= page_remaining;
2456           // current page and the subsequent empty pages in the block
2457           black_page_idx += 1 + DivideByPageSize(block_remaining);
2458           remaining_chunk_size = ModuloPageSize(block_remaining);
2459         } else {
2460           remaining_chunk_size += block_remaining;
2461         }
2462         black_allocs = block_end;
2463       }
2464     }
2465     if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
2466       // Store the leftover first-chunk, if any, and update page index.
2467       if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2468         black_page_idx++;
2469       } else if (first_chunk_size > 0) {
2470         black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2471         first_objs_moving_space_[black_page_idx].Assign(first_obj);
2472         black_page_idx++;
2473       }
2474     }
2475     black_page_count_ = black_page_idx - moving_first_objs_count_;
2476     delete block_sizes;
2477   }
2478   // Update bump-pointer space by consuming all the pre-black blocks into the
2479   // main one.
2480   bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2481                                      post_compact_end_ - begin,
2482                                      consumed_blocks_count);
2483   if (kIsDebugBuild) {
2484     size_t moving_space_size = bump_pointer_space_->Size();
2485     size_t los_size = 0;
2486     if (heap_->GetLargeObjectsSpace()) {
2487       los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
2488     }
2489     // The moving-space size is already updated to post-compact size in SetBlockSizes above.
2490     // Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
2491     // in Sweep(), but not with moving-space freed-bytes.
2492     CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
2493         << " moving-space size:" << moving_space_size
2494         << " moving-space bytes-freed:" << black_objs_slide_diff_
2495         << " large-object-space size:" << los_size
2496         << " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
2497         << " num-tlabs-merged:" << consumed_blocks_count
2498         << " main-block-size:" << (post_compact_end_ - begin)
2499         << " total-tlabs-moving-space:" << num_blocks;
2500   }
2501 }
2502 
UpdateNonMovingSpaceBlackAllocations()2503 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2504   accounting::ObjectStack* stack = heap_->GetAllocationStack();
2505   const StackReference<mirror::Object>* limit = stack->End();
2506   uint8_t* const space_begin = non_moving_space_->Begin();
2507   for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2508     mirror::Object* obj = it->AsMirrorPtr();
2509     if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2510       non_moving_space_bitmap_->Set(obj);
2511       // Clear so that we don't try to set the bit again in the next GC-cycle.
2512       it->Clear();
2513       size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
2514       uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
2515       mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2516       if (first_obj == nullptr
2517           || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2518         first_objs_non_moving_space_[idx].Assign(obj);
2519       }
2520       mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr();
2521       uint8_t* next_page_begin = page_begin + gPageSize;
2522       if (next_page_first_obj == nullptr
2523           || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2524         size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2525         uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2526         while (next_page_begin < obj_end) {
2527           first_objs_non_moving_space_[idx++].Assign(obj);
2528           next_page_begin += gPageSize;
2529         }
2530       }
2531       // update first_objs count in case we went past non_moving_first_objs_count_
2532       non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2533     }
2534   }
2535 }
2536 
2537 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2538  public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector)2539   explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
2540 
operator ()(mirror::Object * obj) const2541   void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
2542     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2543                                                                         obj,
2544                                                                         /*begin_*/nullptr,
2545                                                                         /*end_*/nullptr);
2546     obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2547         visitor, MemberOffset(0), MemberOffset(-1));
2548   }
2549 
Callback(mirror::Object * obj,void * arg)2550   static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2551     reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
2552   }
2553 
2554  private:
2555   MarkCompact* const collector_;
2556 };
2557 
2558 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
2559  public:
ClassLoaderRootsUpdater(MarkCompact * collector)2560   explicit ClassLoaderRootsUpdater(MarkCompact* collector)
2561       : collector_(collector),
2562         moving_space_begin_(collector->black_dense_end_),
2563         moving_space_end_(collector->moving_space_end_) {}
2564 
Visit(ObjPtr<mirror::ClassLoader> class_loader)2565   void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
2566       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
2567     ClassTable* const class_table = class_loader->GetClassTable();
2568     if (class_table != nullptr) {
2569       // Classes are updated concurrently.
2570       class_table->VisitRoots(*this, /*skip_classes=*/true);
2571     }
2572   }
2573 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2574   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2575       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2576     if (!root->IsNull()) {
2577       VisitRoot(root);
2578     }
2579   }
2580 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2581   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2582       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2583     collector_->UpdateRoot(
2584         root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
2585   }
2586 
2587  private:
2588   MarkCompact* collector_;
2589   uint8_t* const moving_space_begin_;
2590   uint8_t* const moving_space_end_;
2591 };
2592 
2593 class MarkCompact::LinearAllocPageUpdater {
2594  public:
LinearAllocPageUpdater(MarkCompact * collector)2595   explicit LinearAllocPageUpdater(MarkCompact* collector)
2596       : collector_(collector),
2597         moving_space_begin_(collector->black_dense_end_),
2598         moving_space_end_(collector->moving_space_end_),
2599         last_page_touched_(false) {}
2600 
2601   // Update a page in multi-object arena.
MultiObjectArena(uint8_t * page_begin,uint8_t * first_obj)2602   void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
2603       REQUIRES_SHARED(Locks::mutator_lock_) {
2604     DCHECK(first_obj != nullptr);
2605     DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
2606     uint8_t* page_end = page_begin + gPageSize;
2607     uint32_t obj_size;
2608     for (uint8_t* byte = first_obj; byte < page_end;) {
2609       TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
2610       obj_size = header->GetSize();
2611       if (UNLIKELY(obj_size == 0)) {
2612         // No more objects in this page to visit.
2613         last_page_touched_ = byte >= page_begin;
2614         return;
2615       }
2616       uint8_t* obj = byte + sizeof(TrackingHeader);
2617       uint8_t* obj_end = byte + obj_size;
2618       if (header->Is16Aligned()) {
2619         obj = AlignUp(obj, 16);
2620       }
2621       uint8_t* begin_boundary = std::max(obj, page_begin);
2622       uint8_t* end_boundary = std::min(obj_end, page_end);
2623       if (begin_boundary < end_boundary) {
2624         VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
2625       }
2626       if (ArenaAllocator::IsRunningOnMemoryTool()) {
2627         obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
2628       }
2629       byte += RoundUp(obj_size, LinearAlloc::kAlignment);
2630     }
2631     last_page_touched_ = true;
2632   }
2633 
2634   // This version is only used for cases where the entire page is filled with
2635   // GC-roots. For example, class-table and intern-table.
SingleObjectArena(uint8_t * page_begin,size_t page_size)2636   void SingleObjectArena(uint8_t* page_begin, size_t page_size)
2637       REQUIRES_SHARED(Locks::mutator_lock_) {
2638     static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
2639     DCHECK_ALIGNED(page_begin, kAlignment);
2640     // Least significant bits are used by class-table.
2641     static constexpr uint32_t kMask = kObjectAlignment - 1;
2642     size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
2643     uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
2644     for (size_t i = 0; i < num_roots; root_ptr++, i++) {
2645       uint32_t word = *root_ptr;
2646       if (word != 0) {
2647         uint32_t lsbs = word & kMask;
2648         word &= ~kMask;
2649         VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
2650         *root_ptr = word | lsbs;
2651         last_page_touched_ = true;
2652       }
2653     }
2654   }
2655 
WasLastPageTouched() const2656   bool WasLastPageTouched() const { return last_page_touched_; }
2657 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2658   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
2659       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2660     if (!root->IsNull()) {
2661       VisitRoot(root);
2662     }
2663   }
2664 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2665   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
2666       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2667     mirror::Object* old_ref = root->AsMirrorPtr();
2668     DCHECK_NE(old_ref, nullptr);
2669     if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
2670       mirror::Object* new_ref = old_ref;
2671       if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
2672         new_ref = collector_->PostCompactBlackObjAddr(old_ref);
2673       } else if (collector_->live_words_bitmap_->Test(old_ref)) {
2674         DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
2675             << "ref:" << old_ref << " root:" << root;
2676         new_ref = collector_->PostCompactOldObjAddr(old_ref);
2677       }
2678       if (old_ref != new_ref) {
2679         root->Assign(new_ref);
2680       }
2681     }
2682   }
2683 
2684  private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const2685   void VisitObject(LinearAllocKind kind,
2686                    void* obj,
2687                    uint8_t* start_boundary,
2688                    uint8_t* end_boundary) const ALWAYS_INLINE
2689       REQUIRES_SHARED(Locks::mutator_lock_) {
2690     switch (kind) {
2691       case LinearAllocKind::kNoGCRoots:
2692         break;
2693       case LinearAllocKind::kGCRootArray:
2694         {
2695           GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
2696           GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
2697           for (; root < last; root++) {
2698             VisitRootIfNonNull(root->AddressWithoutBarrier());
2699           }
2700         }
2701         break;
2702       case LinearAllocKind::kArtMethodArray:
2703         {
2704           LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
2705           // Old methods are clobbered in debug builds. Check size to confirm if the array
2706           // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
2707           if (array->size() > 0) {
2708             if (collector_->pointer_size_ == PointerSize::k64) {
2709               ArtMethod::VisitArrayRoots<PointerSize::k64>(
2710                   *this, start_boundary, end_boundary, array);
2711             } else {
2712               DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
2713               ArtMethod::VisitArrayRoots<PointerSize::k32>(
2714                   *this, start_boundary, end_boundary, array);
2715             }
2716           }
2717         }
2718         break;
2719       case LinearAllocKind::kArtMethod:
2720         ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
2721         break;
2722       case LinearAllocKind::kArtFieldArray:
2723         ArtField::VisitArrayRoots(*this,
2724                                   start_boundary,
2725                                   end_boundary,
2726                                   static_cast<LengthPrefixedArray<ArtField>*>(obj));
2727         break;
2728       case LinearAllocKind::kDexCacheArray:
2729         {
2730           mirror::DexCachePair<mirror::Object>* first =
2731               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
2732           mirror::DexCachePair<mirror::Object>* last =
2733               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
2734           mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
2735       }
2736     }
2737   }
2738 
2739   MarkCompact* const collector_;
2740   // Cache to speed up checking if GC-root is in moving space or not.
2741   uint8_t* const moving_space_begin_;
2742   uint8_t* const moving_space_end_;
2743   // Whether the last page was touched or not.
2744   bool last_page_touched_ = false;
2745 };
2746 
UpdateClassTableClasses(Runtime * runtime,bool immune_class_table_only)2747 void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
2748   // If the process is debuggable then redefinition is allowed, which may mean
2749   // pre-zygote-fork class-tables may have pointer to class in moving-space.
2750   // So visit classes from class-sets that are not in linear-alloc arena-pool.
2751   if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
2752     ClassLinker* linker = runtime->GetClassLinker();
2753     ClassLoaderRootsUpdater updater(this);
2754     GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
2755     auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
2756       if (!set.empty()) {
2757         return immune_class_table_only ?
2758                immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
2759                !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
2760       }
2761       return false;
2762     };
2763     linker->VisitClassTables([cond, &updater](ClassTable* table)
2764                                  REQUIRES_SHARED(Locks::mutator_lock_) {
2765                                table->VisitClassesIfConditionMet(cond, updater);
2766                              });
2767     ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
2768     linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
2769   }
2770 }
2771 
CompactionPause()2772 void MarkCompact::CompactionPause() {
2773   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2774   Runtime* runtime = Runtime::Current();
2775   non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
2776   if (kIsDebugBuild) {
2777     DCHECK_EQ(thread_running_gc_, Thread::Current());
2778     // TODO(Simulator): Test that this should not operate on the simulated stack when the simulator
2779     // supports mark compact.
2780     stack_low_addr_ = thread_running_gc_->GetStackEnd<kNativeStackType>();
2781     stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_)
2782                        + thread_running_gc_->GetUsableStackSize<kNativeStackType>();
2783   }
2784   {
2785     TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
2786     ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2787     // Refresh data-structures to catch-up on allocations that may have
2788     // happened since marking-phase pause.
2789     // There could be several TLABs that got allocated since marking pause. We
2790     // don't want to compact them and instead update the TLAB info in TLS and
2791     // let mutators continue to use the TLABs.
2792     // We need to set all the bits in live-words bitmap corresponding to allocated
2793     // objects. Also, we need to find the objects that are overlapping with
2794     // page-begin boundaries. Unlike objects allocated before
2795     // black_allocations_begin_, which can be identified via mark-bitmap, we can get
2796     // this info only via walking the space past black_allocations_begin_, which
2797     // involves fetching object size.
2798     // TODO: We can reduce the time spent on this in a pause by performing one
2799     // round of this concurrently prior to the pause.
2800     UpdateMovingSpaceBlackAllocations();
2801     // Iterate over the allocation_stack_, for every object in the non-moving
2802     // space:
2803     // 1. Mark the object in live bitmap
2804     // 2. Erase the object from allocation stack
2805     // 3. In the corresponding page, if the first-object vector needs updating
2806     // then do so.
2807     UpdateNonMovingSpaceBlackAllocations();
2808     // This store is visible to mutator (or uffd worker threads) as the mutator
2809     // lock's unlock guarantees that.
2810     compacting_ = true;
2811     // Start updating roots and system weaks now.
2812     heap_->GetReferenceProcessor()->UpdateRoots(this);
2813   }
2814   {
2815     // TODO: Immune space updation has to happen either before or after
2816     // remapping pre-compact pages to from-space. And depending on when it's
2817     // done, we have to invoke VisitRefsForCompaction() with or without
2818     // read-barrier.
2819     TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
2820     accounting::CardTable* const card_table = heap_->GetCardTable();
2821     for (auto& space : immune_spaces_.GetSpaces()) {
2822       DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
2823       accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
2824       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
2825       // Having zygote-space indicates that the first zygote fork has taken
2826       // place and that the classes/dex-caches in immune-spaces may have allocations
2827       // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
2828       // non-userfaultfd visited private-anonymous mappings. Visit them here.
2829       ImmuneSpaceUpdateObjVisitor visitor(this);
2830       if (table != nullptr) {
2831         table->ProcessCards();
2832         table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
2833       } else {
2834         WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2835         card_table->Scan<false>(
2836             live_bitmap,
2837             space->Begin(),
2838             space->Limit(),
2839             visitor,
2840             accounting::CardTable::kCardDirty - 1);
2841       }
2842     }
2843   }
2844 
2845   {
2846     TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
2847     runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
2848     runtime->VisitNonThreadRoots(this);
2849     {
2850       ClassLinker* linker = runtime->GetClassLinker();
2851       ClassLoaderRootsUpdater updater(this);
2852       ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
2853       linker->VisitClassLoaders(&updater);
2854       linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
2855     }
2856     SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
2857 
2858     bool has_zygote_space = heap_->HasZygoteSpace();
2859     GcVisitedArenaPool* arena_pool =
2860         static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
2861     // Update immune/pre-zygote class-tables in case class redefinition took
2862     // place. pre-zygote class-tables that are not in immune spaces are updated
2863     // below if we are in fallback-mode or if there is no zygote space. So in
2864     // that case only visit class-tables that are there in immune-spaces.
2865     UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
2866 
2867     // Acquire arena-pool's lock, which should be released after the pool is
2868     // userfaultfd registered. This is to ensure that no new arenas are
2869     // allocated and used in between. Since they will not be captured in
2870     // linear_alloc_arenas_ below, we will miss updating their pages. The same
2871     // reason also applies to new allocations within the existing arena which
2872     // may change last_byte.
2873     // Since we are in a STW pause, this shouldn't happen anyways, but holding
2874     // the lock confirms it.
2875     // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
2876     // doesn't fail once it is available for ReaderWriterMutex.
2877     WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
2878 
2879     // TODO: Find out why it's not sufficient to visit native roots of immune
2880     // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
2881     // Is it possible that some native root starts getting pointed to by some object
2882     // in moving space after fork? Or are we missing a write-barrier somewhere
2883     // when a native root is updated?
2884     auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
2885                              REQUIRES_SHARED(Locks::mutator_lock_) {
2886                            LinearAllocPageUpdater updater(this);
2887                            if (first_obj != nullptr) {
2888                              updater.MultiObjectArena(page_begin, first_obj);
2889                            } else {
2890                              updater.SingleObjectArena(page_begin, page_size);
2891                            }
2892                          };
2893     if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
2894       // Besides fallback-mode, visit linear-alloc space in the pause for zygote
2895       // processes prior to first fork (that's when zygote space gets created).
2896       if (kIsDebugBuild && IsValidFd(uffd_)) {
2897         // All arenas allocated so far are expected to be pre-zygote fork.
2898         arena_pool->ForEachAllocatedArena(
2899             [](const TrackedArena& arena)
2900                 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
2901       }
2902       arena_pool->VisitRoots(arena_visitor);
2903     } else {
2904       // Inform the arena-pool that compaction is going on. So the TrackedArena
2905       // objects corresponding to the arenas that are freed shouldn't be deleted
2906       // immediately. We will do that in FinishPhase(). This is to avoid ABA
2907       // problem.
2908       arena_pool->DeferArenaFreeing();
2909       arena_pool->ForEachAllocatedArena(
2910           [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
2911               REQUIRES_SHARED(Locks::mutator_lock_) {
2912             // The pre-zygote fork arenas are not visited concurrently in the
2913             // zygote children processes. The native roots of the dirty objects
2914             // are visited during immune space visit below.
2915             if (!arena.IsPreZygoteForkArena()) {
2916               uint8_t* last_byte = arena.GetLastUsedByte();
2917               auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
2918               CHECK(ret.second);
2919             } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
2920               // Pre-zygote class-table and intern-table don't need to be updated.
2921               // TODO: Explore the possibility of using /proc/self/pagemap to
2922               // fetch which pages in these arenas are private-dirty and then only
2923               // visit those pages. To optimize it further, we can keep all
2924               // pre-zygote arenas in a single memory range so that just one read
2925               // from pagemap is sufficient.
2926               arena.VisitRoots(arena_visitor);
2927             }
2928           });
2929     }
2930     // Release order wrt to mutator threads' SIGBUS handler load.
2931     sigbus_in_progress_count_[0].store(0, std::memory_order_relaxed);
2932     sigbus_in_progress_count_[1].store(0, std::memory_order_release);
2933     KernelPreparation();
2934   }
2935 
2936   UpdateNonMovingSpace();
2937   // fallback mode
2938   if (uffd_ == kFallbackMode) {
2939     CompactMovingSpace<kFallbackMode>(nullptr);
2940 
2941     int32_t freed_bytes = black_objs_slide_diff_;
2942     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
2943     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
2944   } else {
2945     DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
2946   }
2947   stack_low_addr_ = nullptr;
2948 }
2949 
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size)2950 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, uint8_t* from_addr, size_t map_size) {
2951   int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
2952   if (gHaveMremapDontunmap) {
2953     mremap_flags |= MREMAP_DONTUNMAP;
2954   }
2955 
2956   void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
2957   CHECK_EQ(ret, static_cast<void*>(from_addr))
2958       << "mremap to move pages failed: " << strerror(errno)
2959       << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
2960 
2961   if (!gHaveMremapDontunmap) {
2962     // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
2963     // the moving space again.
2964     int mmap_flags = MAP_FIXED;
2965     // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
2966     // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
2967     // available, to avoid unmapping someone else' mapping and then causing
2968     // crashes elsewhere.
2969     mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
2970     ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
2971     CHECK_EQ(ret, static_cast<void*>(to_addr))
2972         << "mmap for moving space failed: " << strerror(errno);
2973   }
2974 }
2975 
KernelPreparation()2976 void MarkCompact::KernelPreparation() {
2977   TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
2978   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
2979   size_t moving_space_size = bump_pointer_space_->Capacity();
2980   size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
2981   DCHECK_LE(moving_space_register_sz, moving_space_size);
2982 
2983   KernelPrepareRangeForUffd(moving_space_begin, from_space_begin_, moving_space_size);
2984 
2985   if (IsValidFd(uffd_)) {
2986     if (moving_space_register_sz > 0) {
2987       // mremap clears 'anon_vma' field of anonymous mappings. If we
2988       // uffd-register only the used portion of the space, then the vma gets
2989       // split (between used and unused portions) and as soon as pages are
2990       // mapped to the vmas, they get different `anon_vma` assigned, which
2991       // ensures that the two vmas cannot merge after we uffd-unregister the
2992       // used portion. OTOH, registering the entire space avoids the split, but
2993       // unnecessarily causes userfaults on allocations.
2994       // By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
2995       // the vma-split in uffd-register. This ensures that when we unregister
2996       // the used portion after compaction, the two split vmas merge. This is
2997       // necessary for the mremap of the next GC cycle to not fail due to having
2998       // more than one vma in the source range.
2999       //
3000       // Fault in address aligned to PMD size so that in case THP is enabled,
3001       // we don't mistakenly fault a page in beginning portion that will be
3002       // registered with uffd. If the alignment takes us beyond the space, then
3003       // fault the first page and madvise it.
3004       size_t pmd_size = Heap::GetPMDSize();
3005       uint8_t* fault_in_addr = AlignUp(moving_space_begin + moving_space_register_sz, pmd_size);
3006       if (bump_pointer_space_->Contains(reinterpret_cast<mirror::Object*>(fault_in_addr))) {
3007         *const_cast<volatile uint8_t*>(fault_in_addr) = 0;
3008       } else {
3009         DCHECK_ALIGNED_PARAM(moving_space_begin, gPageSize);
3010         *const_cast<volatile uint8_t*>(moving_space_begin) = 0;
3011         madvise(moving_space_begin, pmd_size, MADV_DONTNEED);
3012       }
3013       // Register the moving space with userfaultfd.
3014       RegisterUffd(moving_space_begin, moving_space_register_sz);
3015       // madvise ensures that if any page gets mapped (only possible if some
3016       // thread is reading the page(s) without trying to make sense as we hold
3017       // mutator-lock exclusively) between mremap and uffd-registration, then
3018       // it gets zapped so that the map is empty and ready for userfaults. If
3019       // we could mremap after uffd-registration (like in case of linear-alloc
3020       // space below) then we wouldn't need it. But since we don't register the
3021       // entire space, we can't do that.
3022       madvise(moving_space_begin, moving_space_register_sz, MADV_DONTNEED);
3023     }
3024     // Prepare linear-alloc for concurrent compaction.
3025     for (auto& data : linear_alloc_spaces_data_) {
3026       DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
3027       // There could be threads running in suspended mode when the compaction
3028       // pause is being executed. In order to make the userfaultfd setup atomic,
3029       // the registration has to be done *before* moving the pages to shadow map.
3030       RegisterUffd(data.begin_, data.shadow_.Size());
3031       KernelPrepareRangeForUffd(data.begin_, data.shadow_.Begin(), data.shadow_.Size());
3032     }
3033   }
3034 }
3035 
SigbusHandler(siginfo_t * info)3036 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3037   class ScopedInProgressCount {
3038    public:
3039     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3040       // Increment the count only if compaction is not done yet.
3041       for (idx_ = 0; idx_ < 2; idx_++) {
3042         SigbusCounterType prev =
3043             collector_->sigbus_in_progress_count_[idx_].load(std::memory_order_relaxed);
3044         while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3045           if (collector_->sigbus_in_progress_count_[idx_].compare_exchange_strong(
3046                   prev, prev + 1, std::memory_order_acquire)) {
3047             DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3048             return;
3049           }
3050         }
3051       }
3052     }
3053 
3054     bool TolerateEnoent() const { return idx_ == 1; }
3055 
3056     bool IsCompactionDone() const { return idx_ == 2; }
3057 
3058     ~ScopedInProgressCount() {
3059       if (idx_ < 2) {
3060         collector_->sigbus_in_progress_count_[idx_].fetch_sub(1, std::memory_order_release);
3061       }
3062     }
3063 
3064    private:
3065     MarkCompact* const collector_;
3066     uint8_t idx_;
3067   };
3068 
3069   if (info->si_code != BUS_ADRERR) {
3070     // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3071     // handled here.
3072     return false;
3073   }
3074 
3075   ScopedInProgressCount spc(this);
3076   uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
3077   if (!spc.IsCompactionDone()) {
3078     if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3079       Thread* self = Thread::Current();
3080       Locks::mutator_lock_->AssertSharedHeld(self);
3081       size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3082       ConcurrentlyProcessMovingPage(fault_page,
3083                                     self->GetThreadLocalGcBuffer(),
3084                                     nr_moving_space_used_pages,
3085                                     spc.TolerateEnoent());
3086       return true;
3087     } else {
3088       // Find the linear-alloc space containing fault-addr
3089       for (auto& data : linear_alloc_spaces_data_) {
3090         if (data.begin_ <= fault_page && data.end_ > fault_page) {
3091           ConcurrentlyProcessLinearAllocPage(fault_page, spc.TolerateEnoent());
3092           return true;
3093         }
3094       }
3095       // Fault address doesn't belong to either moving-space or linear-alloc.
3096       return false;
3097     }
3098   } else {
3099     // We may spuriously get SIGBUS fault, which was initiated before the
3100     // compaction was finished, but ends up here. In that case, if the fault
3101     // address is valid then consider it handled.
3102     return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3103            linear_alloc_spaces_data_.end() !=
3104                std::find_if(linear_alloc_spaces_data_.begin(),
3105                             linear_alloc_spaces_data_.end(),
3106                             [fault_page](const LinearAllocSpaceData& data) {
3107                               return data.begin_ <= fault_page && data.end_ > fault_page;
3108                             });
3109   }
3110 }
3111 
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages,bool tolerate_enoent)3112 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3113                                                 uint8_t* buf,
3114                                                 size_t nr_moving_space_used_pages,
3115                                                 bool tolerate_enoent) {
3116   Thread* self = Thread::Current();
3117   uint8_t* unused_space_begin =
3118       bump_pointer_space_->Begin() + nr_moving_space_used_pages * gPageSize;
3119   DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
3120   if (fault_page >= unused_space_begin) {
3121     // There is a race which allows more than one thread to install a
3122     // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3123     // the ioctl and move on.
3124     ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3125     return;
3126   }
3127   size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin());
3128   DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
3129   mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3130   if (first_obj == nullptr) {
3131     DCHECK_GT(fault_page, post_compact_end_);
3132     // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
3133     uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
3134     if (fault_page < self->GetTlabStart() || fault_page >= end) {
3135       end = fault_page + gPageSize;
3136     }
3137     size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
3138     size_t length = 0;
3139     for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
3140       uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
3141       if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
3142         DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
3143         break;
3144       }
3145     }
3146     if (length > 0) {
3147       length = ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, tolerate_enoent);
3148       for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
3149         moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3150                                         std::memory_order_release);
3151       }
3152     }
3153     return;
3154   }
3155 
3156   uint32_t raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3157   uint32_t backoff_count = 0;
3158   PageState state;
3159   while (true) {
3160     state = GetPageStateFromWord(raw_state);
3161     if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
3162         state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
3163       // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
3164       // The wait is not expected to be long as the read state indicates that the other
3165       // thread is actively working on the page.
3166       BackOff(backoff_count++);
3167       raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3168     } else if (state == PageState::kProcessedAndMapped) {
3169       // Nothing to do.
3170       break;
3171     } else {
3172       // Acquire order to ensure we don't start writing to a page, which could
3173       // be shared, before the CAS is successful.
3174       if (state == PageState::kUnprocessed &&
3175           moving_pages_status_[page_idx].compare_exchange_strong(
3176               raw_state,
3177               static_cast<uint8_t>(PageState::kMutatorProcessing),
3178               std::memory_order_acquire)) {
3179         if (fault_page < black_dense_end_) {
3180           UpdateNonMovingPage(first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
3181           buf = fault_page + from_space_slide_diff_;
3182         } else {
3183           if (UNLIKELY(buf == nullptr)) {
3184             uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3185             // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3186             CHECK_LE(idx, kMutatorCompactionBufferCount);
3187             buf = compaction_buffers_map_.Begin() + idx * gPageSize;
3188             DCHECK(compaction_buffers_map_.HasAddress(buf));
3189             self->SetThreadLocalGcBuffer(buf);
3190           }
3191 
3192           if (fault_page < post_compact_end_) {
3193             // The page has to be compacted.
3194             CompactPage(first_obj,
3195                         pre_compact_offset_moving_space_[page_idx],
3196                         buf,
3197                         /*needs_memset_zero=*/true);
3198           } else {
3199             DCHECK_NE(first_obj, nullptr);
3200             DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3201             uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3202             uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
3203             mirror::Object* next_page_first_obj = nullptr;
3204             if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
3205               next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
3206             }
3207             DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
3208             SlideBlackPage(first_obj,
3209                            next_page_first_obj,
3210                            first_chunk_size,
3211                            pre_compact_page,
3212                            buf,
3213                            /*needs_memset_zero=*/true);
3214           }
3215         }
3216         // Nobody else would simultaneously modify this page's state so an
3217         // atomic store is sufficient. Use 'release' order to guarantee that
3218         // loads/stores to the page are finished before this store. Since the
3219         // mutator used its own buffer for the processing, there is no reason to
3220         // put its index in the status of the page. Also, the mutator is going
3221         // to immediately map the page, so that info is not needed.
3222         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
3223                                              std::memory_order_release);
3224         CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false, tolerate_enoent);
3225         // Store is sufficient as no other thread modifies the status at this stage.
3226         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3227                                              std::memory_order_release);
3228         break;
3229       }
3230       state = GetPageStateFromWord(raw_state);
3231       if (state == PageState::kProcessed) {
3232         size_t arr_len = moving_first_objs_count_ + black_page_count_;
3233         // The page is processed but not mapped. We should map it. The release
3234         // order used in MapMovingSpacePages will ensure that the increment to
3235         // moving_compaction_in_progress is done first.
3236         if (MapMovingSpacePages(page_idx,
3237                                 arr_len,
3238                                 /*from_fault=*/true,
3239                                 /*return_on_contention=*/false,
3240                                 tolerate_enoent) > 0) {
3241           break;
3242         }
3243         raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3244       }
3245     }
3246   }
3247 }
3248 
MapUpdatedLinearAllocPages(uint8_t * start_page,uint8_t * start_shadow_page,Atomic<PageState> * state,size_t length,bool free_pages,bool single_ioctl,bool tolerate_enoent)3249 bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
3250                                              uint8_t* start_shadow_page,
3251                                              Atomic<PageState>* state,
3252                                              size_t length,
3253                                              bool free_pages,
3254                                              bool single_ioctl,
3255                                              bool tolerate_enoent) {
3256   DCHECK_ALIGNED_PARAM(length, gPageSize);
3257   Atomic<PageState>* madv_state = state;
3258   size_t madv_len = length;
3259   uint8_t* madv_start = start_shadow_page;
3260   bool check_state_for_madv = false;
3261   uint8_t* end_page = start_page + length;
3262   while (start_page < end_page) {
3263     size_t map_len = 0;
3264     // Find a contiguous range of pages that we can map in single ioctl.
3265     for (Atomic<PageState>* cur_state = state;
3266          map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
3267          map_len += gPageSize, cur_state++) {
3268       // No body.
3269     }
3270 
3271     if (map_len == 0) {
3272       if (single_ioctl) {
3273         return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
3274       }
3275       // Skip all the pages that this thread can't map.
3276       while (length > 0) {
3277         PageState s = state->load(std::memory_order_relaxed);
3278         if (s == PageState::kProcessed) {
3279           break;
3280         }
3281         // If we find any page which is being processed or mapped (only possible by a mutator(s))
3282         // then we need to re-check the page-state and, if needed, wait for the state to change
3283         // to 'mapped', before the shadow pages are reclaimed.
3284         check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
3285         state++;
3286         length -= gPageSize;
3287         start_shadow_page += gPageSize;
3288         start_page += gPageSize;
3289       }
3290     } else {
3291       map_len = CopyIoctl(start_page,
3292                           start_shadow_page,
3293                           map_len,
3294                           /*return_on_contention=*/false,
3295                           tolerate_enoent);
3296       DCHECK_NE(map_len, 0u);
3297       // Declare that the pages are ready to be accessed. Store is sufficient
3298       // as any thread will be storing the same value.
3299       for (size_t l = 0; l < map_len; l += gPageSize, state++) {
3300         PageState s = state->load(std::memory_order_relaxed);
3301         DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped) << "state:" << s;
3302         state->store(PageState::kProcessedAndMapped, std::memory_order_release);
3303       }
3304       if (single_ioctl) {
3305         break;
3306       }
3307       start_page += map_len;
3308       start_shadow_page += map_len;
3309       length -= map_len;
3310       // state is already updated above.
3311     }
3312   }
3313   if (free_pages) {
3314     if (check_state_for_madv) {
3315       // Wait until all the pages are mapped before releasing them. This is needed to be
3316       // checked only if some mutators were found to be concurrently mapping pages earlier.
3317       for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
3318         uint32_t backoff_count = 0;
3319         PageState s = madv_state->load(std::memory_order_relaxed);
3320         while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
3321           BackOff(backoff_count++);
3322           s = madv_state->load(std::memory_order_relaxed);
3323         }
3324       }
3325     }
3326     ZeroAndReleaseMemory(madv_start, madv_len);
3327   }
3328   return true;
3329 }
3330 
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool tolerate_enoent)3331 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool tolerate_enoent) {
3332   auto arena_iter = linear_alloc_arenas_.end();
3333   {
3334     TrackedArena temp_arena(fault_page);
3335     arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3336     arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3337                                                             : linear_alloc_arenas_.end();
3338   }
3339   // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
3340   // because a thread trying to access the page and as a result causing this
3341   // userfault confirms that nobody can delete the corresponding arena and
3342   // release its pages.
3343   // NOTE: We may have some memory range be recycled several times during a
3344   // compaction cycle, thereby potentially causing userfault on the same page
3345   // several times. That's not a problem as all of them (except for possibly the
3346   // first one) would require us mapping a zero-page, which we do without updating
3347   // the 'state_arr'.
3348   if (arena_iter == linear_alloc_arenas_.end() ||
3349       arena_iter->first->IsWaitingForDeletion() ||
3350       arena_iter->second <= fault_page) {
3351     // Fault page isn't in any of the arenas that existed before we started
3352     // compaction. So map zeropage and return.
3353     ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3354   } else {
3355     // Find the linear-alloc space containing fault-page
3356     LinearAllocSpaceData* space_data = nullptr;
3357     for (auto& data : linear_alloc_spaces_data_) {
3358       if (data.begin_ <= fault_page && fault_page < data.end_) {
3359         space_data = &data;
3360         break;
3361       }
3362     }
3363     DCHECK_NE(space_data, nullptr);
3364     ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3365     size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
3366     Atomic<PageState>* state_arr =
3367         reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3368     PageState state = state_arr[page_idx].load(std::memory_order_acquire);
3369     uint32_t backoff_count = 0;
3370     while (true) {
3371       switch (state) {
3372         case PageState::kUnprocessed: {
3373           // Acquire order to ensure we don't start writing to shadow map, which is
3374           // shared, before the CAS is successful.
3375           if (state_arr[page_idx].compare_exchange_strong(
3376                   state, PageState::kProcessing, std::memory_order_acquire)) {
3377             LinearAllocPageUpdater updater(this);
3378             uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3379             // null first_obj indicates that it's a page from arena for
3380             // intern-table/class-table. So first object isn't required.
3381             if (first_obj != nullptr) {
3382               updater.MultiObjectArena(fault_page + diff, first_obj + diff);
3383             } else {
3384               updater.SingleObjectArena(fault_page + diff, gPageSize);
3385             }
3386             if (updater.WasLastPageTouched()) {
3387               state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3388               state = PageState::kProcessed;
3389               continue;
3390             } else {
3391               // If the page wasn't touched, then it means it is empty and
3392               // is most likely not present on the shadow-side. Furthermore,
3393               // since the shadow is also userfaultfd registered doing copy
3394               // ioctl fails as the copy-from-user in the kernel will cause
3395               // userfault. Instead, just map a zeropage, which is not only
3396               // correct but also efficient as it avoids unnecessary memcpy
3397               // in the kernel.
3398               if (ZeropageIoctl(fault_page,
3399                                 gPageSize,
3400                                 /*tolerate_eexist=*/false,
3401                                 tolerate_enoent)) {
3402                 state_arr[page_idx].store(PageState::kProcessedAndMapped,
3403                                           std::memory_order_release);
3404               }
3405               return;
3406             }
3407           }
3408         }
3409           continue;
3410         case PageState::kProcessed:
3411           // Map as many pages as possible in a single ioctl, without spending
3412           // time freeing pages.
3413           if (MapUpdatedLinearAllocPages(fault_page,
3414                                          fault_page + diff,
3415                                          state_arr + page_idx,
3416                                          space_data->end_ - fault_page,
3417                                          /*free_pages=*/false,
3418                                          /*single_ioctl=*/true,
3419                                          tolerate_enoent)) {
3420             return;
3421           }
3422           // fault_page was not mapped by this thread (some other thread claimed
3423           // it). Wait for it to be mapped before returning.
3424           FALLTHROUGH_INTENDED;
3425         case PageState::kProcessing:
3426         case PageState::kProcessingAndMapping:
3427         case PageState::kProcessedAndMapping:
3428           // Wait for the page to be mapped before returning.
3429           BackOff(backoff_count++);
3430           state = state_arr[page_idx].load(std::memory_order_acquire);
3431           continue;
3432         case PageState::kMutatorProcessing:
3433           LOG(FATAL) << "Unreachable";
3434           UNREACHABLE();
3435         case PageState::kProcessedAndMapped:
3436           // Somebody else took care of the page.
3437           return;
3438       }
3439       break;
3440     }
3441   }
3442 }
3443 
ProcessLinearAlloc()3444 void MarkCompact::ProcessLinearAlloc() {
3445   GcVisitedArenaPool* arena_pool =
3446       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3447   DCHECK_EQ(thread_running_gc_, Thread::Current());
3448   uint8_t* unmapped_range_start = nullptr;
3449   uint8_t* unmapped_range_end = nullptr;
3450   // Pointer to the linear-alloc space containing the current arena in the loop
3451   // below. Also helps in ensuring that two arenas, which are contiguous in
3452   // address space but are from different linear-alloc spaces, are not coalesced
3453   // into one range for mapping purpose.
3454   LinearAllocSpaceData* space_data = nullptr;
3455   Atomic<PageState>* state_arr = nullptr;
3456   ptrdiff_t diff = 0;
3457 
3458   auto map_pages = [&]() {
3459     DCHECK_NE(diff, 0);
3460     DCHECK_NE(space_data, nullptr);
3461     DCHECK_GE(unmapped_range_start, space_data->begin_);
3462     DCHECK_LT(unmapped_range_start, space_data->end_);
3463     DCHECK_GT(unmapped_range_end, space_data->begin_);
3464     DCHECK_LE(unmapped_range_end, space_data->end_);
3465     DCHECK_LT(unmapped_range_start, unmapped_range_end);
3466     DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
3467     size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
3468     MapUpdatedLinearAllocPages(unmapped_range_start,
3469                                unmapped_range_start + diff,
3470                                state_arr + page_idx,
3471                                unmapped_range_end - unmapped_range_start,
3472                                /*free_pages=*/true,
3473                                /*single_ioctl=*/false,
3474                                /*tolerate_enoent=*/false);
3475   };
3476   for (auto& pair : linear_alloc_arenas_) {
3477     const TrackedArena* arena = pair.first;
3478     size_t arena_size = arena->Size();
3479     uint8_t* arena_begin = arena->Begin();
3480     // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
3481     // in that order.
3482     DCHECK_LE(unmapped_range_end, arena_begin);
3483     DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
3484         << "space-begin:" << static_cast<void*>(space_data->begin_)
3485         << " arena-begin:" << static_cast<void*>(arena_begin);
3486     if (space_data == nullptr || space_data->end_ <= arena_begin) {
3487       // Map the processed arenas as we are switching to another space.
3488       if (space_data != nullptr && unmapped_range_end != nullptr) {
3489         map_pages();
3490         unmapped_range_end = nullptr;
3491       }
3492       // Find the linear-alloc space containing the arena
3493       LinearAllocSpaceData* curr_space_data = space_data;
3494       for (auto& data : linear_alloc_spaces_data_) {
3495         if (data.begin_ <= arena_begin && arena_begin < data.end_) {
3496           // Since arenas are sorted, the next space should be higher in address
3497           // order than the current one.
3498           DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
3499           diff = data.shadow_.Begin() - data.begin_;
3500           state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
3501           space_data = &data;
3502           break;
3503         }
3504       }
3505       CHECK_NE(space_data, curr_space_data)
3506           << "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
3507     }
3508     // Map the processed arenas if we found a hole within the current space.
3509     if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
3510       map_pages();
3511       unmapped_range_end = nullptr;
3512     }
3513     if (unmapped_range_end == nullptr) {
3514       unmapped_range_start = unmapped_range_end = arena_begin;
3515     }
3516     DCHECK_NE(unmapped_range_start, nullptr);
3517     // It's ok to include all arenas in the unmapped range. Since the
3518     // corresponding state bytes will be kUnprocessed, we will skip calling
3519     // ioctl and madvise on arenas which are waiting to be deleted.
3520     unmapped_range_end += arena_size;
3521     {
3522       // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
3523       // does not get deleted at the same time. If this critical section is too
3524       // long and impacts mutator response time, then we get rid of this lock by
3525       // holding onto memory ranges of all deleted (since compaction pause)
3526       // arenas until completion finishes.
3527       ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
3528       // If any arenas were freed since compaction pause then skip them from
3529       // visiting.
3530       if (arena->IsWaitingForDeletion()) {
3531         continue;
3532       }
3533       uint8_t* last_byte = pair.second;
3534       DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
3535       auto visitor = [space_data, last_byte, diff, this, state_arr](
3536                          uint8_t* page_begin,
3537                          uint8_t* first_obj,
3538                          size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
3539         // No need to process pages past last_byte as they already have updated
3540         // gc-roots, if any.
3541         if (page_begin >= last_byte) {
3542           return;
3543         }
3544         LinearAllocPageUpdater updater(this);
3545         size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
3546         DCHECK_LT(page_idx, space_data->page_status_map_.Size());
3547         PageState expected_state = PageState::kUnprocessed;
3548         // Acquire order to ensure that we don't start accessing the shadow page,
3549         // which is shared with other threads, prior to CAS. Also, for same
3550         // reason, we used 'release' order for changing the state to 'processed'.
3551         if (state_arr[page_idx].compare_exchange_strong(
3552                 expected_state, PageState::kProcessing, std::memory_order_acquire)) {
3553           // null first_obj indicates that it's a page from arena for
3554           // intern-table/class-table. So first object isn't required.
3555           if (first_obj != nullptr) {
3556             updater.MultiObjectArena(page_begin + diff, first_obj + diff);
3557           } else {
3558             DCHECK_EQ(page_size, gPageSize);
3559             updater.SingleObjectArena(page_begin + diff, page_size);
3560           }
3561           expected_state = PageState::kProcessing;
3562           // Store is sufficient as no other thread could be modifying it. Use
3563           // release order to ensure that the writes to shadow page are
3564           // committed to memory before.
3565           if (updater.WasLastPageTouched()) {
3566             state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3567           } else {
3568             // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
3569             ZeropageIoctl(
3570                 page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3571             // Ioctl will act as release fence.
3572             state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
3573           }
3574         }
3575       };
3576 
3577       arena->VisitRoots(visitor);
3578     }
3579   }
3580   if (unmapped_range_end > unmapped_range_start) {
3581     // Map remaining pages.
3582     map_pages();
3583   }
3584 }
3585 
RegisterUffd(void * addr,size_t size)3586 void MarkCompact::RegisterUffd(void* addr, size_t size) {
3587   DCHECK(IsValidFd(uffd_));
3588   struct uffdio_register uffd_register;
3589   uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
3590   uffd_register.range.len = size;
3591   uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
3592   CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
3593       << "ioctl_userfaultfd: register failed: " << strerror(errno)
3594       << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
3595 }
3596 
3597 // TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
3598 // when we unregister the unused portion of the moving-space). Implement support
3599 // for that.
UnregisterUffd(uint8_t * start,size_t len)3600 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
3601   DCHECK(IsValidFd(uffd_));
3602   struct uffdio_range range;
3603   range.start = reinterpret_cast<uintptr_t>(start);
3604   range.len = len;
3605   CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
3606       << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
3607       << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
3608 }
3609 
CompactionPhase()3610 void MarkCompact::CompactionPhase() {
3611   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3612   {
3613     int32_t freed_bytes = black_objs_slide_diff_;
3614     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3615     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3616   }
3617 
3618   CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
3619 
3620   ProcessLinearAlloc();
3621 
3622   auto wait_for_compaction_counter = [this](size_t idx) {
3623     SigbusCounterType count = sigbus_in_progress_count_[idx].fetch_or(
3624         kSigbusCounterCompactionDoneMask, std::memory_order_acq_rel);
3625     // Wait for SIGBUS handlers already in play.
3626     for (uint32_t i = 0; count > 0; i++) {
3627       BackOff(i);
3628       count = sigbus_in_progress_count_[idx].load(std::memory_order_acquire);
3629       count &= ~kSigbusCounterCompactionDoneMask;
3630     }
3631   };
3632   // Set compaction-done bit in the first counter to indicate that gc-thread
3633   // is done compacting and mutators should stop incrementing this counter.
3634   // Mutator should tolerate ENOENT after this. This helps avoid priority
3635   // inversion in case mutators need to map zero-pages after compaction is
3636   // finished but before gc-thread manages to unregister the spaces.
3637   wait_for_compaction_counter(0);
3638 
3639   // Unregister moving-space
3640   size_t moving_space_size = bump_pointer_space_->Capacity();
3641   size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
3642   if (used_size > 0) {
3643     UnregisterUffd(bump_pointer_space_->Begin(), used_size);
3644   }
3645   // Unregister linear-alloc spaces
3646   for (auto& data : linear_alloc_spaces_data_) {
3647     DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
3648     UnregisterUffd(data.begin_, data.shadow_.Size());
3649   }
3650 
3651   // Set compaction-done bit in the second counter to indicate that gc-thread
3652   // is done unregistering the spaces and therefore mutators, if in SIGBUS,
3653   // should return without attempting to map the faulted page. When the mutator
3654   // will access the address again, it will succeed. Once this counter is 0,
3655   // the gc-thread can safely initialize/madvise the data structures.
3656   wait_for_compaction_counter(1);
3657 
3658   // Release all of the memory taken by moving-space's from-map
3659   from_space_map_.MadviseDontNeedAndZero();
3660   // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
3661   DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
3662       << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
3663 
3664   // madvise linear-allocs's page-status array. Note that we don't need to
3665   // madvise the shado-map as the pages from it were reclaimed in
3666   // ProcessLinearAlloc() after arenas were mapped.
3667   for (auto& data : linear_alloc_spaces_data_) {
3668     data.page_status_map_.MadviseDontNeedAndZero();
3669   }
3670 }
3671 
3672 template <size_t kBufferSize>
3673 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
3674  public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)3675   explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
3676         : mark_compact_(mark_compact), self_(self) {}
3677 
~ThreadRootsVisitor()3678   ~ThreadRootsVisitor() {
3679     Flush();
3680   }
3681 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)3682   void VisitRoots(mirror::Object*** roots,
3683                   size_t count,
3684                   [[maybe_unused]] const RootInfo& info) override
3685       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
3686     for (size_t i = 0; i < count; i++) {
3687       mirror::Object* obj = *roots[i];
3688       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3689         Push(obj);
3690       }
3691     }
3692   }
3693 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)3694   void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
3695                   size_t count,
3696                   [[maybe_unused]] const RootInfo& info) override
3697       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
3698     for (size_t i = 0; i < count; i++) {
3699       mirror::Object* obj = roots[i]->AsMirrorPtr();
3700       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3701         Push(obj);
3702       }
3703     }
3704   }
3705 
3706  private:
Flush()3707   void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
3708                REQUIRES(Locks::heap_bitmap_lock_) {
3709     StackReference<mirror::Object>* start;
3710     StackReference<mirror::Object>* end;
3711     {
3712       MutexLock mu(self_, mark_compact_->lock_);
3713       // Loop here because even after expanding once it may not be sufficient to
3714       // accommodate all references. It's almost impossible, but there is no harm
3715       // in implementing it this way.
3716       while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
3717         mark_compact_->ExpandMarkStack();
3718       }
3719     }
3720     while (idx_ > 0) {
3721       *start++ = roots_[--idx_];
3722     }
3723     DCHECK_EQ(start, end);
3724   }
3725 
Push(mirror::Object * obj)3726   void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
3727                                  REQUIRES(Locks::heap_bitmap_lock_) {
3728     if (UNLIKELY(idx_ >= kBufferSize)) {
3729       Flush();
3730     }
3731     roots_[idx_++].Assign(obj);
3732   }
3733 
3734   StackReference<mirror::Object> roots_[kBufferSize];
3735   size_t idx_ = 0;
3736   MarkCompact* const mark_compact_;
3737   Thread* const self_;
3738 };
3739 
3740 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
3741  public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)3742   explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
3743 
Run(Thread * thread)3744   void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
3745     ScopedTrace trace("Marking thread roots");
3746     // Note: self is not necessarily equal to thread since thread may be
3747     // suspended.
3748     Thread* const self = Thread::Current();
3749     CHECK(thread == self
3750           || thread->IsSuspended()
3751           || thread->GetState() == ThreadState::kWaitingPerformingGc)
3752         << thread->GetState() << " thread " << thread << " self " << self;
3753     {
3754       ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
3755       thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
3756     }
3757     // Clear page-buffer to prepare for compaction phase.
3758     thread->SetThreadLocalGcBuffer(nullptr);
3759 
3760     // If thread is a running mutator, then act on behalf of the garbage
3761     // collector. See the code in ThreadList::RunCheckpoint.
3762     mark_compact_->GetBarrier().Pass(self);
3763   }
3764 
3765  private:
3766   MarkCompact* const mark_compact_;
3767 };
3768 
MarkRootsCheckpoint(Thread * self,Runtime * runtime)3769 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
3770   // We revote TLABs later during paused round of marking.
3771   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3772   CheckpointMarkThreadRoots check_point(this);
3773   ThreadList* thread_list = runtime->GetThreadList();
3774   gc_barrier_.Init(self, 0);
3775   // Request the check point is run on all threads returning a count of the threads that must
3776   // run through the barrier including self.
3777   size_t barrier_count = thread_list->RunCheckpoint(&check_point);
3778   // Release locks then wait for all mutator threads to pass the barrier.
3779   // If there are no threads to wait which implys that all the checkpoint functions are finished,
3780   // then no need to release locks.
3781   if (barrier_count == 0) {
3782     return;
3783   }
3784   Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
3785   Locks::mutator_lock_->SharedUnlock(self);
3786   {
3787     ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
3788     gc_barrier_.Increment(self, barrier_count);
3789   }
3790   Locks::mutator_lock_->SharedLock(self);
3791   Locks::heap_bitmap_lock_->ExclusiveLock(self);
3792 }
3793 
MarkNonThreadRoots(Runtime * runtime)3794 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
3795   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3796   runtime->VisitNonThreadRoots(this);
3797 }
3798 
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)3799 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
3800   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3801   runtime->VisitConcurrentRoots(this, flags);
3802 }
3803 
RevokeAllThreadLocalBuffers()3804 void MarkCompact::RevokeAllThreadLocalBuffers() {
3805   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3806   bump_pointer_space_->RevokeAllThreadLocalBuffers();
3807 }
3808 
3809 class MarkCompact::ScanObjectVisitor {
3810  public:
ScanObjectVisitor(MarkCompact * const mark_compact)3811   explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
3812       : mark_compact_(mark_compact) {}
3813 
operator ()(ObjPtr<mirror::Object> obj) const3814   void operator()(ObjPtr<mirror::Object> obj) const
3815       ALWAYS_INLINE
3816       REQUIRES(Locks::heap_bitmap_lock_)
3817       REQUIRES_SHARED(Locks::mutator_lock_) {
3818     mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
3819   }
3820 
3821  private:
3822   MarkCompact* const mark_compact_;
3823 };
3824 
UpdateAndMarkModUnion()3825 void MarkCompact::UpdateAndMarkModUnion() {
3826   accounting::CardTable* const card_table = heap_->GetCardTable();
3827   for (const auto& space : immune_spaces_.GetSpaces()) {
3828     const char* name = space->IsZygoteSpace()
3829         ? "UpdateAndMarkZygoteModUnionTable"
3830         : "UpdateAndMarkImageModUnionTable";
3831     DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
3832     TimingLogger::ScopedTiming t(name, GetTimings());
3833     accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3834     if (table != nullptr) {
3835       // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
3836       // that's fine because these objects are immutable enough (referent can
3837       // only be cleared) and hence the only referents they can have are intra-space.
3838       table->UpdateAndMarkReferences(this);
3839     } else {
3840       // No mod-union table, scan all dirty/aged cards in the corresponding
3841       // card-table. This can only occur for app images.
3842       card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
3843                                              space->Begin(),
3844                                              space->End(),
3845                                              ScanObjectVisitor(this),
3846                                              gc::accounting::CardTable::kCardAged);
3847     }
3848   }
3849 }
3850 
MarkReachableObjects()3851 void MarkCompact::MarkReachableObjects() {
3852   UpdateAndMarkModUnion();
3853   // Recursively mark all the non-image bits set in the mark bitmap.
3854   ProcessMarkStack();
3855 }
3856 
ScanDirtyObjects(bool paused,uint8_t minimum_age)3857 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
3858   accounting::CardTable* card_table = heap_->GetCardTable();
3859   for (const auto& space : heap_->GetContinuousSpaces()) {
3860     const char* name = nullptr;
3861     switch (space->GetGcRetentionPolicy()) {
3862     case space::kGcRetentionPolicyNeverCollect:
3863       name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
3864       break;
3865     case space::kGcRetentionPolicyFullCollect:
3866       name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
3867       break;
3868     case space::kGcRetentionPolicyAlwaysCollect:
3869       name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
3870       break;
3871     }
3872     TimingLogger::ScopedTiming t(name, GetTimings());
3873     card_table->Scan</*kClearCard*/ false>(
3874         space->GetMarkBitmap(), space->Begin(), space->End(), ScanObjectVisitor(this), minimum_age);
3875   }
3876 }
3877 
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)3878 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
3879   ScanDirtyObjects(paused, minimum_age);
3880   ProcessMarkStack();
3881 }
3882 
MarkRoots(VisitRootFlags flags)3883 void MarkCompact::MarkRoots(VisitRootFlags flags) {
3884   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3885   Runtime* runtime = Runtime::Current();
3886   // Make sure that the checkpoint which collects the stack roots is the first
3887   // one capturning GC-roots. As this one is supposed to find the address
3888   // everything allocated after that (during this marking phase) will be
3889   // considered 'marked'.
3890   MarkRootsCheckpoint(thread_running_gc_, runtime);
3891   MarkNonThreadRoots(runtime);
3892   MarkConcurrentRoots(flags, runtime);
3893 }
3894 
PreCleanCards()3895 void MarkCompact::PreCleanCards() {
3896   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3897   CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
3898   // Age the card-table before thread stack scanning checkpoint in MarkRoots()
3899   // as it ensures that there are no in-progress write barriers which started
3900   // prior to aging the card-table.
3901   PrepareCardTableForMarking(/*clear_alloc_space_cards*/ false);
3902   MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
3903   RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
3904 }
3905 
3906 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
3907 // in this case, then we need a stop-the-world (STW) round in the end to mark
3908 // objects which were written into concurrently while concurrent marking was
3909 // performed.
3910 // In order to minimize the pause time, we could take one of the two approaches:
3911 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
3912 // below a threshold.
3913 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
3914 // that it's taking too long, then resume mutators and retry.
3915 //
3916 // Given the non-trivial fixed overhead of running a round (card table and root
3917 // scan), it might be better to go with approach 2.
MarkingPhase()3918 void MarkCompact::MarkingPhase() {
3919   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3920   DCHECK_EQ(thread_running_gc_, Thread::Current());
3921   WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3922   MaybeClampGcStructures();
3923   PrepareCardTableForMarking(/*clear_alloc_space_cards*/ true);
3924   MarkZygoteLargeObjects();
3925   MarkRoots(
3926         static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
3927   MarkReachableObjects();
3928   // Pre-clean dirtied cards to reduce pauses.
3929   PreCleanCards();
3930 
3931   // Setup reference processing and forward soft references once before enabling
3932   // slow path (in MarkingPause)
3933   ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
3934   bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
3935   rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
3936   if (!clear_soft_references) {
3937     // Forward as many SoftReferences as possible before inhibiting reference access.
3938     rp->ForwardSoftReferences(GetTimings());
3939   }
3940 }
3941 
3942 class MarkCompact::RefFieldsVisitor {
3943  public:
RefFieldsVisitor(MarkCompact * const mark_compact)3944   ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact)
3945     : mark_compact_(mark_compact) {}
3946 
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const3947   ALWAYS_INLINE void operator()(mirror::Object* obj,
3948                                 MemberOffset offset,
3949                                 [[maybe_unused]] bool is_static) const
3950       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3951     if (kCheckLocks) {
3952       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
3953       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
3954     }
3955     mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
3956   }
3957 
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const3958   void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
3959       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3960     mark_compact_->DelayReferenceReferent(klass, ref);
3961   }
3962 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3963   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
3964       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3965     if (!root->IsNull()) {
3966       VisitRoot(root);
3967     }
3968   }
3969 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3970   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3971       REQUIRES(Locks::heap_bitmap_lock_)
3972       REQUIRES_SHARED(Locks::mutator_lock_) {
3973     if (kCheckLocks) {
3974       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
3975       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
3976     }
3977     mark_compact_->MarkObject(root->AsMirrorPtr());
3978   }
3979 
3980  private:
3981   MarkCompact* const mark_compact_;
3982 };
3983 
3984 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const3985 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
3986   const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
3987   size_t words = 0;
3988   for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
3989     words += POPCOUNT(Bitmap::Begin()[index + i]);
3990   }
3991   return words * kAlignment;
3992 }
3993 
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)3994 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
3995   DCHECK(obj != nullptr);
3996   DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
3997   uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
3998   UpdateClassAfterObjectMap(obj);
3999   size_t size = RoundUp(obj_size, kAlignment);
4000   uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4001   size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize;
4002   // Compute the bit-index within the chunk-info vector word.
4003   bit_index %= kBitsPerVectorWord;
4004   size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4005 
4006   chunk_info_vec_[chunk_idx++] += first_chunk_portion;
4007   DCHECK_LE(first_chunk_portion, size);
4008   for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4009     DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4010     chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4011   }
4012   chunk_info_vec_[chunk_idx] += size;
4013   freed_objects_--;
4014 }
4015 
4016 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4017 void MarkCompact::ScanObject(mirror::Object* obj) {
4018   mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4019   // TODO(lokeshgidra): Remove the following condition once b/373609505 is fixed.
4020   if (UNLIKELY(klass == nullptr)) {
4021     // It was seen in ConcurrentCopying GC that after a small wait when we reload
4022     // the class pointer, it turns out to be a valid class object. So as a workaround,
4023     // we can continue execution and log an error that this happened.
4024     for (size_t i = 0; i < 1000; i++) {
4025       // Wait for 1ms at a time. Don't wait for more than 1 second in total.
4026       usleep(1000);
4027       klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4028       if (klass != nullptr) {
4029         std::ostringstream oss;
4030         klass->DumpClass(oss, mirror::Class::kDumpClassFullDetail);
4031         LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj
4032                                  << " found to be null first. Reloading after " << i
4033                                  << " iterations of 1ms sleep fetched klass: " << oss.str();
4034         break;
4035       }
4036     }
4037 
4038     if (UNLIKELY(klass == nullptr)) {
4039       // It must be heap corruption.
4040       LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj << " found to be null.";
4041     }
4042     heap_->GetVerification()->LogHeapCorruption(
4043         obj, mirror::Object::ClassOffset(), klass, /*fatal=*/true);
4044   }
4045   // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4046   // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4047   // here and pass that information to `UpdateLivenessInfo`.
4048   size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4049   bytes_scanned_ += obj_size;
4050 
4051   RefFieldsVisitor visitor(this);
4052   DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4053   if (kUpdateLiveWords && HasAddress(obj)) {
4054     UpdateLivenessInfo(obj, obj_size);
4055   }
4056   obj->VisitReferences(visitor, visitor);
4057 }
4058 
4059 // Scan anything that's on the mark stack.
ProcessMarkStack()4060 void MarkCompact::ProcessMarkStack() {
4061   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4062   // TODO: try prefetch like in CMS
4063   while (!mark_stack_->IsEmpty()) {
4064     mirror::Object* obj = mark_stack_->PopBack();
4065     DCHECK(obj != nullptr);
4066     ScanObject</*kUpdateLiveWords*/ true>(obj);
4067   }
4068 }
4069 
ExpandMarkStack()4070 void MarkCompact::ExpandMarkStack() {
4071   const size_t new_size = mark_stack_->Capacity() * 2;
4072   std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4073                                                    mark_stack_->End());
4074   mark_stack_->Resize(new_size);
4075   for (auto& ref : temp) {
4076     mark_stack_->PushBack(ref.AsMirrorPtr());
4077   }
4078   DCHECK(!mark_stack_->IsFull());
4079 }
4080 
PushOnMarkStack(mirror::Object * obj)4081 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4082   if (UNLIKELY(mark_stack_->IsFull())) {
4083     ExpandMarkStack();
4084   }
4085   mark_stack_->PushBack(obj);
4086 }
4087 
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4088 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4089                                            mirror::Object* holder,
4090                                            MemberOffset offset) {
4091   DCHECK(obj != nullptr);
4092   if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4093     PushOnMarkStack(obj);
4094   }
4095 }
4096 
4097 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4098 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4099                                                  mirror::Object* holder,
4100                                                  MemberOffset offset) {
4101   // We expect most of the referenes to be in bump-pointer space, so try that
4102   // first to keep the cost of this function minimal.
4103   if (LIKELY(HasAddress(obj))) {
4104     return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4105                      : !moving_space_bitmap_->Set(obj);
4106   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4107     return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4108                      : !non_moving_space_bitmap_->Set(obj);
4109   } else if (immune_spaces_.ContainsObject(obj)) {
4110     DCHECK(IsMarked(obj) != nullptr);
4111     return false;
4112   } else {
4113     // Must be a large-object space, otherwise it's a case of heap corruption.
4114     if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
4115       // Objects in large-object space are aligned to the large-object alignment.
4116       // So if we have an object which doesn't belong to any space and is not
4117       // page-aligned as well, then it's memory corruption.
4118       // TODO: implement protect/unprotect in bump-pointer space.
4119       heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4120     }
4121     DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4122         << "ref=" << obj
4123         << " doesn't belong to any of the spaces and large object space doesn't exist";
4124     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4125     DCHECK(los_bitmap->HasAddress(obj));
4126     if (kParallel) {
4127       los_bitmap->AtomicTestAndSet(obj);
4128     } else {
4129       los_bitmap->Set(obj);
4130     }
4131     // We only have primitive arrays in large object space. So there is no
4132     // reason to push into mark-stack.
4133     DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4134     return false;
4135   }
4136 }
4137 
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4138 inline void MarkCompact::MarkObject(mirror::Object* obj,
4139                                     mirror::Object* holder,
4140                                     MemberOffset offset) {
4141   if (obj != nullptr) {
4142     MarkObjectNonNull(obj, holder, offset);
4143   }
4144 }
4145 
MarkObject(mirror::Object * obj)4146 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4147   MarkObject(obj, nullptr, MemberOffset(0));
4148   return obj;
4149 }
4150 
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4151 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4152                                     [[maybe_unused]] bool do_atomic_update) {
4153   MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4154 }
4155 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4156 void MarkCompact::VisitRoots(mirror::Object*** roots,
4157                              size_t count,
4158                              const RootInfo& info) {
4159   if (compacting_) {
4160     uint8_t* moving_space_begin = black_dense_end_;
4161     uint8_t* moving_space_end = moving_space_end_;
4162     for (size_t i = 0; i < count; ++i) {
4163       UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4164     }
4165   } else {
4166     for (size_t i = 0; i < count; ++i) {
4167       MarkObjectNonNull(*roots[i]);
4168     }
4169   }
4170 }
4171 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4172 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4173                              size_t count,
4174                              const RootInfo& info) {
4175   // TODO: do we need to check if the root is null or not?
4176   if (compacting_) {
4177     uint8_t* moving_space_begin = black_dense_end_;
4178     uint8_t* moving_space_end = moving_space_end_;
4179     for (size_t i = 0; i < count; ++i) {
4180       UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4181     }
4182   } else {
4183     for (size_t i = 0; i < count; ++i) {
4184       MarkObjectNonNull(roots[i]->AsMirrorPtr());
4185     }
4186   }
4187 }
4188 
IsMarked(mirror::Object * obj)4189 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4190   if (HasAddress(obj)) {
4191     const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4192     if (compacting_) {
4193       if (is_black) {
4194         return PostCompactBlackObjAddr(obj);
4195       } else if (moving_space_bitmap_->Test(obj)) {
4196         if (reinterpret_cast<uint8_t*>(obj) < black_dense_end_) {
4197           return obj;
4198         } else {
4199           return PostCompactOldObjAddr(obj);
4200         }
4201       } else {
4202         return nullptr;
4203       }
4204     }
4205     return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4206   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4207     if (non_moving_space_bitmap_->Test(obj)) {
4208       return obj;
4209     }
4210   } else if (immune_spaces_.ContainsObject(obj)) {
4211     return obj;
4212   } else {
4213     DCHECK(heap_->GetLargeObjectsSpace())
4214         << "ref=" << obj
4215         << " doesn't belong to any of the spaces and large object space doesn't exist";
4216     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4217     if (los_bitmap->HasAddress(obj)) {
4218       DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
4219       if (los_bitmap->Test(obj)) {
4220         return obj;
4221       }
4222     } else {
4223       // The given obj is not in any of the known spaces, so return null. This could
4224       // happen for instance in interpreter caches wherein a concurrent updation
4225       // to the cache could result in obj being a non-reference. This is
4226       // tolerable because SweepInterpreterCaches only updates if the given
4227       // object has moved, which can't be the case for the non-reference.
4228       return nullptr;
4229     }
4230   }
4231   return marking_done_ && IsOnAllocStack(obj) ? obj : nullptr;
4232 }
4233 
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4234 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4235                                               [[maybe_unused]] bool do_atomic_update) {
4236   mirror::Object* ref = obj->AsMirrorPtr();
4237   if (ref == nullptr) {
4238     return true;
4239   }
4240   return IsMarked(ref);
4241 }
4242 
4243 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4244 // has not yet been marked, put it on the appropriate list in the heap for later
4245 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4246 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4247                                          ObjPtr<mirror::Reference> ref) {
4248   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4249 }
4250 
FinishPhase()4251 void MarkCompact::FinishPhase() {
4252   GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4253   bool is_zygote = Runtime::Current()->IsZygote();
4254   compacting_ = false;
4255   marking_done_ = false;
4256 
4257   ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size());
4258   info_map_.MadviseDontNeedAndZero();
4259   live_words_bitmap_->ClearBitmap();
4260   if (moving_space_begin_ == black_dense_end_) {
4261     moving_space_bitmap_->Clear();
4262   } else {
4263     DCHECK_LT(moving_space_begin_, black_dense_end_);
4264     DCHECK_LE(black_dense_end_, moving_space_end_);
4265     moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(black_dense_end_),
4266                                      reinterpret_cast<mirror::Object*>(moving_space_end_));
4267   }
4268   bump_pointer_space_->SetBlackDenseRegionSize(black_dense_end_ - moving_space_begin_);
4269 
4270   if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
4271     // This unregisters all ranges as a side-effect.
4272     close(uffd_);
4273     uffd_ = kFdUnused;
4274     uffd_initialized_ = false;
4275   }
4276   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
4277   mark_stack_->Reset();
4278   DCHECK_EQ(thread_running_gc_, Thread::Current());
4279   if (kIsDebugBuild) {
4280     MutexLock mu(thread_running_gc_, lock_);
4281     if (updated_roots_.get() != nullptr) {
4282       updated_roots_->clear();
4283     }
4284   }
4285   class_after_obj_map_.clear();
4286   linear_alloc_arenas_.clear();
4287   {
4288     ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4289     WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
4290     heap_->ClearMarkedObjects();
4291   }
4292   GcVisitedArenaPool* arena_pool =
4293       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
4294   arena_pool->DeleteUnusedArenas();
4295 }
4296 
4297 }  // namespace collector
4298 }  // namespace gc
4299 }  // namespace art
4300