1 /*
2 * Copyright 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <sys/syscall.h>
29 #include <unistd.h>
30
31 #include <fstream>
32 #include <numeric>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36
37 #include "android-base/file.h"
38 #include "android-base/parsebool.h"
39 #include "android-base/parseint.h"
40 #include "android-base/properties.h"
41 #include "android-base/strings.h"
42 #include "base/file_utils.h"
43 #include "base/memfd.h"
44 #include "base/quasi_atomic.h"
45 #include "base/systrace.h"
46 #include "base/utils.h"
47 #include "gc/accounting/mod_union_table-inl.h"
48 #include "gc/collector_type.h"
49 #include "gc/reference_processor.h"
50 #include "gc/space/bump_pointer_space.h"
51 #include "gc/task_processor.h"
52 #include "gc/verification-inl.h"
53 #include "jit/jit_code_cache.h"
54 #include "mark_compact-inl.h"
55 #include "mirror/object-refvisitor-inl.h"
56 #include "read_barrier_config.h"
57 #include "scoped_thread_state_change-inl.h"
58 #include "sigchain.h"
59 #include "thread_list.h"
60
61 #ifdef ART_TARGET_ANDROID
62 #include "android-modules-utils/sdk_level.h"
63 #include "com_android_art.h"
64 #endif
65
66 #ifndef __BIONIC__
67 #ifndef MREMAP_DONTUNMAP
68 #define MREMAP_DONTUNMAP 4
69 #endif
70 #endif // __BIONIC__
71
72 // See aosp/2996596 for where these values came from.
73 #ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
74 #define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
75 #endif
76 #ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
77 #define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
78 #endif
79 #ifdef ART_TARGET_ANDROID
80 namespace {
81
82 using ::android::base::GetBoolProperty;
83 using ::android::base::ParseBool;
84 using ::android::base::ParseBoolResult;
85 using ::android::modules::sdklevel::IsAtLeastV;
86
87 } // namespace
88 #endif
89
90 namespace art HIDDEN {
91
HaveMremapDontunmap()92 static bool HaveMremapDontunmap() {
93 const size_t page_size = GetPageSizeSlow();
94 void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
95 CHECK_NE(old, MAP_FAILED);
96 void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
97 CHECK_EQ(munmap(old, page_size), 0);
98 if (addr != MAP_FAILED) {
99 CHECK_EQ(munmap(addr, page_size), 0);
100 return true;
101 } else {
102 return false;
103 }
104 }
105
106 static bool gUffdSupportsMmapTrylock = false;
107 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
108 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
109 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
110 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
111 static uint64_t gUffdFeatures = 0;
112 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
113 static constexpr uint64_t kUffdFeaturesForMinorFault =
114 UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
115 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
116 // A region which is more than kBlackDenseRegionThreshold percent live doesn't
117 // need to be compacted as it is too densely packed.
118 static constexpr uint kBlackDenseRegionThreshold = 95U;
119 // We consider SIGBUS feature necessary to enable this GC as it's superior than
120 // threading-based implementation for janks. We may want minor-fault in future
121 // to be available for making jit-code-cache updation concurrent, which uses shmem.
KernelSupportsUffd()122 bool KernelSupportsUffd() {
123 #ifdef __linux__
124 if (gHaveMremapDontunmap) {
125 int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
126 // On non-android devices we may not have the kernel patches that restrict
127 // userfaultfd to user mode. But that is not a security concern as we are
128 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
129 if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
130 fd = syscall(__NR_userfaultfd, O_CLOEXEC);
131 }
132 if (fd >= 0) {
133 // We are only fetching the available features, which is returned by the
134 // ioctl.
135 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
136 CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
137 gUffdFeatures = api.features;
138 // MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
139 // versions will have per-vma locks. The lower ones don't support
140 // userfaultfd.
141 if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
142 // Check if MMAP_TRYLOCK feature is supported
143 const size_t page_size = GetPageSizeSlow();
144 void* mem =
145 mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
146 CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
147
148 struct uffdio_zeropage uffd_zeropage;
149 uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
150 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
151 uffd_zeropage.range.len = page_size;
152 uffd_zeropage.zeropage = 0;
153 // The ioctl will definitely fail as mem is not registered with uffd.
154 CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
155 // uffd ioctls return EINVAL for several reasons. We make sure with
156 // (proper alignment of 'mem' and 'len') that, before updating
157 // uffd_zeropage.zeropage (with error), it fails with EINVAL only if
158 // `trylock` isn't available.
159 if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
160 LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
161 << " page-size:" << page_size;
162 } else {
163 gUffdSupportsMmapTrylock = true;
164 LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
165 << " size:" << page_size;
166 }
167 munmap(mem, page_size);
168 }
169 close(fd);
170 // Minimum we need is sigbus feature for using userfaultfd.
171 return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
172 }
173 }
174 #endif
175 return false;
176 }
177
178 // The other cases are defined as constexpr in runtime/read_barrier_config.h
179 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
180 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()181 static gc::CollectorType FetchCmdlineGcType() {
182 std::string argv;
183 gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
184 if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
185 auto pos = argv.rfind("-Xgc:");
186 if (argv.substr(pos + 5, 3) == "CMC") {
187 gc_type = gc::CollectorType::kCollectorTypeCMC;
188 } else if (argv.substr(pos + 5, 2) == "CC") {
189 gc_type = gc::CollectorType::kCollectorTypeCC;
190 }
191 }
192 return gc_type;
193 }
194
195 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()196 static int GetOverrideCacheInfoFd() {
197 std::string args_str;
198 if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
199 LOG(WARNING) << "Failed to load /proc/self/cmdline";
200 return -1;
201 }
202 std::vector<std::string_view> args;
203 Split(std::string_view(args_str), /*separator=*/'\0', &args);
204 for (std::string_view arg : args) {
205 if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) { // This is a dex2oat flag.
206 int fd;
207 if (!android::base::ParseInt(std::string(arg), &fd)) {
208 LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
209 return -1;
210 }
211 return fd;
212 }
213 }
214 return -1;
215 }
216
GetCachedProperties()217 static std::unordered_map<std::string, std::string> GetCachedProperties() {
218 // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
219 static bool called = false;
220 CHECK(!called) << "GetCachedBoolProperty can be called only once";
221 called = true;
222
223 std::string cache_info_contents;
224 int fd = GetOverrideCacheInfoFd();
225 if (fd >= 0) {
226 if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
227 PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
228 return {};
229 }
230 } else {
231 std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
232 if (!android::base::ReadFileToString(path, &cache_info_contents)) {
233 // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
234 // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
235 // nothing we can do.
236 if (errno != ENOENT) {
237 PLOG(ERROR) << "Failed to read cache-info from the default path";
238 }
239 return {};
240 }
241 }
242
243 std::optional<com::android::art::CacheInfo> cache_info =
244 com::android::art::parse(cache_info_contents.c_str());
245 if (!cache_info.has_value()) {
246 // This should never happen.
247 LOG(ERROR) << "Failed to parse cache-info";
248 return {};
249 }
250 const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
251 if (list == nullptr) {
252 // This should never happen.
253 LOG(ERROR) << "Missing system properties from cache-info";
254 return {};
255 }
256 const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
257 std::unordered_map<std::string, std::string> result;
258 for (const com::android::art::KeyValuePair& pair : properties) {
259 result[pair.getK()] = pair.getV();
260 }
261 return result;
262 }
263
GetCachedBoolProperty(const std::unordered_map<std::string,std::string> & cached_properties,const std::string & key,bool default_value)264 static bool GetCachedBoolProperty(
265 const std::unordered_map<std::string, std::string>& cached_properties,
266 const std::string& key,
267 bool default_value) {
268 auto it = cached_properties.find(key);
269 if (it == cached_properties.end()) {
270 return default_value;
271 }
272 ParseBoolResult result = ParseBool(it->second);
273 switch (result) {
274 case ParseBoolResult::kTrue:
275 return true;
276 case ParseBoolResult::kFalse:
277 return false;
278 case ParseBoolResult::kError:
279 return default_value;
280 }
281 }
282
SysPropSaysUffdGc()283 static bool SysPropSaysUffdGc() {
284 // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
285 // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
286 std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
287 bool phenotype_enable = GetCachedBoolProperty(
288 cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
289 bool phenotype_force_disable = GetCachedBoolProperty(
290 cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
291 bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
292 bool is_at_most_u = !IsAtLeastV();
293 return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
294 }
295 #else
296 // Never called.
SysPropSaysUffdGc()297 static bool SysPropSaysUffdGc() { return false; }
298 #endif
299
ShouldUseUserfaultfd()300 static bool ShouldUseUserfaultfd() {
301 static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
302 #ifdef __linux__
303 // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
304 // always use CC on host. On target, use CMC only if system properties says so
305 // and the kernel supports it.
306 gc::CollectorType gc_type = FetchCmdlineGcType();
307 return gc_type == gc::CollectorType::kCollectorTypeCMC ||
308 (gc_type == gc::CollectorType::kCollectorTypeNone &&
309 kIsTargetAndroid &&
310 SysPropSaysUffdGc() &&
311 KernelSupportsUffd());
312 #else
313 return false;
314 #endif
315 }
316
317 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
318 const bool gUseReadBarrier = !gUseUserfaultfd;
319 #endif
320
321 namespace gc {
322 namespace collector {
323
324 // Turn off kCheckLocks when profiling the GC as it slows down the GC
325 // significantly.
326 static constexpr bool kCheckLocks = kDebugLocking;
327 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
328 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
329 // case. It's extremely unlikely that we will ever have more than these number
330 // of mutator threads trying to access the moving-space during one compaction
331 // phase.
332 static constexpr size_t kMutatorCompactionBufferCount = 2048;
333 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
334 // Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
335 static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
336 // Concurrent compaction termination logic is different (and slightly more efficient) if the
337 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
338 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
339 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
340 // up the GC thread at the end.
341 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
342
GetUffdAndMinorFault()343 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
344 bool uffd_available;
345 // In most cases the gUffdFeatures will already be initialized at boot time
346 // when libart is loaded. On very old kernels we may get '0' from the kernel,
347 // in which case we would be doing the syscalls each time this function is
348 // called. But that's very unlikely case. There are no correctness issues as
349 // the response from kernel never changes after boot.
350 if (UNLIKELY(gUffdFeatures == 0)) {
351 uffd_available = KernelSupportsUffd();
352 } else {
353 // We can have any uffd features only if uffd exists.
354 uffd_available = true;
355 }
356 bool minor_fault_available =
357 (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
358 return std::pair<bool, bool>(uffd_available, minor_fault_available);
359 }
360
CreateUserfaultfd(bool post_fork)361 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
362 if (post_fork || uffd_ == kFdUnused) {
363 // Check if we have MREMAP_DONTUNMAP here for cases where
364 // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
365 // that userfaultfd isn't used on old kernels, which cause random ioctl
366 // failures.
367 if (gHaveMremapDontunmap) {
368 // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
369 // any read event available. We don't use poll.
370 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
371 // On non-android devices we may not have the kernel patches that restrict
372 // userfaultfd to user mode. But that is not a security concern as we are
373 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
374 if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
375 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
376 }
377 if (UNLIKELY(uffd_ == -1)) {
378 uffd_ = kFallbackMode;
379 LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
380 << ") and therefore falling back to stop-the-world compaction.";
381 } else {
382 DCHECK(IsValidFd(uffd_));
383 // Initialize uffd with the features which are required and available.
384 // Using private anonymous mapping in threading mode is the default,
385 // for which we don't need to ask for any features. Note: this mode
386 // is not used in production.
387 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
388 // We should add SIGBUS feature only if we plan on using it as
389 // requesting it here will mean threading mode will not work.
390 CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
391 api.features |= kUffdFeaturesForSigbus;
392 CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
393 << "ioctl_userfaultfd: API: " << strerror(errno);
394 }
395 } else {
396 uffd_ = kFallbackMode;
397 }
398 }
399 uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
400 return IsValidFd(uffd_);
401 }
402
403 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)404 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
405 uintptr_t begin, uintptr_t end) {
406 return static_cast<LiveWordsBitmap<kAlignment>*>(
407 MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
408 }
409
ComputeInfoMapSize()410 size_t MarkCompact::ComputeInfoMapSize() {
411 size_t moving_space_size = bump_pointer_space_->Capacity();
412 size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
413 size_t nr_moving_pages = DivideByPageSize(moving_space_size);
414 size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
415 return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
416 nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
417 }
418
InitializeInfoMap(uint8_t * p,size_t moving_space_sz)419 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
420 size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
421
422 chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
423 vector_length_ = moving_space_sz / kOffsetChunkSize;
424 size_t total = vector_length_ * sizeof(uint32_t);
425
426 first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
427 total += nr_moving_pages * sizeof(ObjReference);
428
429 pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
430 total += nr_moving_pages * sizeof(uint32_t);
431
432 moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
433 total += nr_moving_pages * sizeof(Atomic<uint32_t>);
434
435 first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
436 total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
437 DCHECK_EQ(total, ComputeInfoMapSize());
438 return total;
439 }
440
MarkCompact(Heap * heap)441 MarkCompact::MarkCompact(Heap* heap)
442 : GarbageCollector(heap, "concurrent mark compact"),
443 gc_barrier_(0),
444 lock_("mark compact lock", kGenericBottomLock),
445 bump_pointer_space_(heap->GetBumpPointerSpace()),
446 moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
447 moving_space_begin_(bump_pointer_space_->Begin()),
448 moving_space_end_(bump_pointer_space_->Limit()),
449 black_dense_end_(moving_space_begin_),
450 uffd_(kFdUnused),
451 sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask},
452 compacting_(false),
453 marking_done_(false),
454 uffd_initialized_(false),
455 clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
456 if (kIsDebugBuild) {
457 updated_roots_.reset(new std::unordered_set<void*>());
458 }
459 if (gUffdFeatures == 0) {
460 GetUffdAndMinorFault();
461 }
462 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
463 // TODO: Depending on how the bump-pointer space move is implemented. If we
464 // switch between two virtual memories each time, then we will have to
465 // initialize live_words_bitmap_ accordingly.
466 live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
467 reinterpret_cast<uintptr_t>(moving_space_begin),
468 reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
469
470 std::string err_msg;
471 size_t moving_space_size = bump_pointer_space_->Capacity();
472 {
473 // Create one MemMap for all the data structures
474 info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
475 ComputeInfoMapSize(),
476 PROT_READ | PROT_WRITE,
477 /*low_4gb=*/false,
478 &err_msg);
479 if (UNLIKELY(!info_map_.IsValid())) {
480 LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
481 } else {
482 size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
483 DCHECK_EQ(total, info_map_.Size());
484 }
485 }
486
487 size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
488 // The moving space is created at a fixed address, which is expected to be
489 // PMD-size aligned.
490 if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
491 LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
492 << ". This can lead to longer stop-the-world pauses for compaction";
493 }
494 // NOTE: PROT_NONE is used here as these mappings are for address space reservation
495 // only and will be used only after appropriately remapping them.
496 from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
497 moving_space_size,
498 PROT_NONE,
499 /*low_4gb=*/kObjPtrPoisoning,
500 moving_space_alignment,
501 &err_msg);
502 if (UNLIKELY(!from_space_map_.IsValid())) {
503 LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
504 } else {
505 from_space_begin_ = from_space_map_.Begin();
506 }
507
508 compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
509 (1 + kMutatorCompactionBufferCount) * gPageSize,
510 PROT_READ | PROT_WRITE,
511 /*low_4gb=*/kObjPtrPoisoning,
512 &err_msg);
513 if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
514 LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
515 }
516 // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
517 conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
518 // Touch the page deliberately to avoid userfaults on it. We madvise it in
519 // CompactionPhase() before using it to terminate concurrent compaction.
520 ForceRead(conc_compaction_termination_page_);
521
522 // In most of the cases, we don't expect more than one LinearAlloc space.
523 linear_alloc_spaces_data_.reserve(1);
524
525 // Initialize GC metrics.
526 metrics::ArtMetrics* metrics = GetMetrics();
527 // The mark-compact collector supports only full-heap collections at the moment.
528 gc_time_histogram_ = metrics->FullGcCollectionTime();
529 metrics_gc_count_ = metrics->FullGcCount();
530 metrics_gc_count_delta_ = metrics->FullGcCountDelta();
531 gc_throughput_histogram_ = metrics->FullGcThroughput();
532 gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
533 gc_throughput_avg_ = metrics->FullGcThroughputAvg();
534 gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
535 gc_scanned_bytes_ = metrics->FullGcScannedBytes();
536 gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
537 gc_freed_bytes_ = metrics->FullGcFreedBytes();
538 gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
539 gc_duration_ = metrics->FullGcDuration();
540 gc_duration_delta_ = metrics->FullGcDurationDelta();
541 are_metrics_initialized_ = true;
542 }
543
AddLinearAllocSpaceData(uint8_t * begin,size_t len)544 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
545 DCHECK_ALIGNED_PARAM(begin, gPageSize);
546 DCHECK_ALIGNED_PARAM(len, gPageSize);
547 DCHECK_GE(len, Heap::GetPMDSize());
548 size_t alignment = Heap::BestPageTableAlignment(len);
549 std::string err_msg;
550 MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
551 len,
552 PROT_NONE,
553 /*low_4gb=*/false,
554 alignment,
555 &err_msg));
556 if (!shadow.IsValid()) {
557 LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
558 UNREACHABLE();
559 }
560
561 MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
562 DivideByPageSize(len),
563 PROT_READ | PROT_WRITE,
564 /*low_4gb=*/false,
565 &err_msg));
566 if (!page_status_map.IsValid()) {
567 LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
568 UNREACHABLE();
569 }
570 linear_alloc_spaces_data_.emplace_back(
571 std::forward<MemMap>(shadow), std::forward<MemMap>(page_status_map), begin, begin + len);
572 }
573
ClampGrowthLimit(size_t new_capacity)574 void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
575 // From-space is the same size as moving-space in virtual memory.
576 // However, if it's in >4GB address space then we don't need to do it
577 // synchronously.
578 #if defined(__LP64__)
579 constexpr bool kClampFromSpace = kObjPtrPoisoning;
580 #else
581 constexpr bool kClampFromSpace = true;
582 #endif
583 size_t old_capacity = bump_pointer_space_->Capacity();
584 new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
585 if (new_capacity < old_capacity) {
586 CHECK(from_space_map_.IsValid());
587 if (kClampFromSpace) {
588 from_space_map_.SetSize(new_capacity);
589 }
590 clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
591 }
592 CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
593 }
594
MaybeClampGcStructures()595 void MarkCompact::MaybeClampGcStructures() {
596 size_t moving_space_size = bump_pointer_space_->Capacity();
597 DCHECK(thread_running_gc_ != nullptr);
598 if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
599 CHECK(from_space_map_.IsValid());
600 if (from_space_map_.Size() > moving_space_size) {
601 from_space_map_.SetSize(moving_space_size);
602 }
603 // Bitmaps and other data structures
604 live_words_bitmap_->SetBitmapSize(moving_space_size);
605 size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
606 CHECK_LT(set_size, info_map_.Size());
607 info_map_.SetSize(set_size);
608
609 clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
610 }
611 }
612
PrepareCardTableForMarking(bool clear_alloc_space_cards)613 void MarkCompact::PrepareCardTableForMarking(bool clear_alloc_space_cards) {
614 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
615 accounting::CardTable* const card_table = heap_->GetCardTable();
616 // immune_spaces_ is emptied in InitializePhase() before marking starts. This
617 // function is invoked twice during marking. We only need to populate immune_spaces_
618 // once per GC cycle. And when it's done (below), all the immune spaces are
619 // added to it. We can never have partially filled immune_spaces_.
620 bool update_immune_spaces = immune_spaces_.IsEmpty();
621 // Mark all of the spaces we never collect as immune.
622 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
623 if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
624 space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
625 CHECK(space->IsZygoteSpace() || space->IsImageSpace());
626 if (update_immune_spaces) {
627 immune_spaces_.AddSpace(space);
628 }
629 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
630 if (table != nullptr) {
631 table->ProcessCards();
632 } else {
633 // Keep cards aged if we don't have a mod-union table since we need
634 // to scan them in future GCs. This case is for app images.
635 card_table->ModifyCardsAtomic(
636 space->Begin(),
637 space->End(),
638 [](uint8_t card) {
639 return (card == gc::accounting::CardTable::kCardClean)
640 ? card
641 : gc::accounting::CardTable::kCardAged;
642 },
643 /* card modified visitor */ VoidFunctor());
644 }
645 } else if (clear_alloc_space_cards) {
646 CHECK(!space->IsZygoteSpace());
647 CHECK(!space->IsImageSpace());
648 // The card-table corresponding to bump-pointer and non-moving space can
649 // be cleared, because we are going to traverse all the reachable objects
650 // in these spaces. This card-table will eventually be used to track
651 // mutations while concurrent marking is going on.
652 card_table->ClearCardRange(space->Begin(), space->Limit());
653 if (space != bump_pointer_space_) {
654 CHECK_EQ(space, heap_->GetNonMovingSpace());
655 non_moving_space_ = space;
656 non_moving_space_bitmap_ = space->GetMarkBitmap();
657 }
658 } else {
659 card_table->ModifyCardsAtomic(
660 space->Begin(),
661 space->End(),
662 [](uint8_t card) {
663 return (card == gc::accounting::CardTable::kCardDirty) ?
664 gc::accounting::CardTable::kCardAged :
665 gc::accounting::CardTable::kCardClean;
666 },
667 /* card modified visitor */ VoidFunctor());
668 }
669 }
670 }
671
MarkZygoteLargeObjects()672 void MarkCompact::MarkZygoteLargeObjects() {
673 Thread* self = thread_running_gc_;
674 DCHECK_EQ(self, Thread::Current());
675 space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
676 if (los != nullptr) {
677 // Pick the current live bitmap (mark bitmap if swapped).
678 accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
679 accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
680 // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
681 std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
682 live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
683 reinterpret_cast<uintptr_t>(range.second),
684 [mark_bitmap, los, self](mirror::Object* obj)
685 REQUIRES(Locks::heap_bitmap_lock_)
686 REQUIRES_SHARED(Locks::mutator_lock_) {
687 if (los->IsZygoteLargeObject(self, obj)) {
688 mark_bitmap->Set(obj);
689 }
690 });
691 }
692 }
693
InitializePhase()694 void MarkCompact::InitializePhase() {
695 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
696 mark_stack_ = heap_->GetMarkStack();
697 CHECK(mark_stack_->IsEmpty());
698 immune_spaces_.Reset();
699 moving_first_objs_count_ = 0;
700 non_moving_first_objs_count_ = 0;
701 black_page_count_ = 0;
702 bytes_scanned_ = 0;
703 freed_objects_ = 0;
704 // The first buffer is used by gc-thread.
705 compaction_buffer_counter_.store(1, std::memory_order_relaxed);
706 black_allocations_begin_ = bump_pointer_space_->Limit();
707 DCHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
708 from_space_slide_diff_ = from_space_begin_ - moving_space_begin_;
709 moving_space_end_ = bump_pointer_space_->Limit();
710 if (black_dense_end_ > moving_space_begin_) {
711 moving_space_bitmap_->Clear();
712 }
713 black_dense_end_ = moving_space_begin_;
714 // TODO: Would it suffice to read it once in the constructor, which is called
715 // in zygote process?
716 pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
717 }
718
719 class MarkCompact::ThreadFlipVisitor : public Closure {
720 public:
ThreadFlipVisitor(MarkCompact * collector)721 explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
722
Run(Thread * thread)723 void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
724 // Note: self is not necessarily equal to thread since thread may be suspended.
725 Thread* self = Thread::Current();
726 CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
727 << thread->GetState() << " thread " << thread << " self " << self;
728 thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
729 // Interpreter cache is thread-local so it needs to be swept either in a
730 // flip, or a stop-the-world pause.
731 CHECK(collector_->compacting_);
732 thread->SweepInterpreterCache(collector_);
733 thread->AdjustTlab(collector_->black_objs_slide_diff_);
734 }
735
736 private:
737 MarkCompact* const collector_;
738 };
739
740 class MarkCompact::FlipCallback : public Closure {
741 public:
FlipCallback(MarkCompact * collector)742 explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
743
Run(Thread * thread)744 void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
745 collector_->CompactionPause();
746 }
747
748 private:
749 MarkCompact* const collector_;
750 };
751
RunPhases()752 void MarkCompact::RunPhases() {
753 Thread* self = Thread::Current();
754 thread_running_gc_ = self;
755 Runtime* runtime = Runtime::Current();
756 InitializePhase();
757 GetHeap()->PreGcVerification(this);
758 {
759 ReaderMutexLock mu(self, *Locks::mutator_lock_);
760 MarkingPhase();
761 }
762 {
763 // Marking pause
764 ScopedPause pause(this);
765 MarkingPause();
766 if (kIsDebugBuild) {
767 bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
768 }
769 }
770 bool perform_compaction;
771 {
772 ReaderMutexLock mu(self, *Locks::mutator_lock_);
773 ReclaimPhase();
774 perform_compaction = PrepareForCompaction();
775 }
776
777 if (perform_compaction) {
778 // Compaction pause
779 ThreadFlipVisitor visitor(this);
780 FlipCallback callback(this);
781 runtime->GetThreadList()->FlipThreadRoots(
782 &visitor, &callback, this, GetHeap()->GetGcPauseListener());
783
784 if (IsValidFd(uffd_)) {
785 ReaderMutexLock mu(self, *Locks::mutator_lock_);
786 CompactionPhase();
787 }
788 }
789 FinishPhase();
790 GetHeap()->PostGcVerification(this);
791 thread_running_gc_ = nullptr;
792 }
793
InitMovingSpaceFirstObjects(size_t vec_len,size_t to_space_page_idx)794 void MarkCompact::InitMovingSpaceFirstObjects(size_t vec_len, size_t to_space_page_idx) {
795 uint32_t offset_in_chunk_word;
796 uint32_t offset;
797 mirror::Object* obj;
798 const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
799
800 // Find the first live word.
801 size_t chunk_idx = to_space_page_idx * (gPageSize / kOffsetChunkSize);
802 DCHECK_LT(chunk_idx, vec_len);
803 // Find the first live word in the space
804 for (; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
805 if (chunk_idx >= vec_len) {
806 // We don't have any live data on the moving-space.
807 moving_first_objs_count_ = to_space_page_idx;
808 return;
809 }
810 }
811 DCHECK_LT(chunk_idx, vec_len);
812 // Use live-words bitmap to find the first live word
813 offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
814 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
815 DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
816 << " chunk_idx=" << chunk_idx
817 << " N=0"
818 << " offset_in_word=" << offset_in_chunk_word
819 << " word=" << std::hex
820 << live_words_bitmap_->GetWord(chunk_idx);
821 obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
822 // TODO: add a check to validate the object.
823
824 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
825 first_objs_moving_space_[to_space_page_idx].Assign(obj);
826 to_space_page_idx++;
827
828 uint32_t page_live_bytes = 0;
829 while (true) {
830 for (; page_live_bytes <= gPageSize; chunk_idx++) {
831 if (chunk_idx >= vec_len) {
832 moving_first_objs_count_ = to_space_page_idx;
833 return;
834 }
835 page_live_bytes += chunk_info_vec_[chunk_idx];
836 }
837 chunk_idx--;
838 page_live_bytes -= gPageSize;
839 DCHECK_LE(page_live_bytes, kOffsetChunkSize);
840 DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
841 << " chunk_idx=" << chunk_idx
842 << " to_space_page_idx=" << to_space_page_idx
843 << " vec_len=" << vec_len;
844 DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
845 offset_in_chunk_word =
846 live_words_bitmap_->FindNthLiveWordOffset(
847 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
848 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
849 DCHECK(live_words_bitmap_->Test(offset))
850 << "offset=" << offset
851 << " chunk_idx=" << chunk_idx
852 << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
853 << " offset_in_word=" << offset_in_chunk_word
854 << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
855 // TODO: Can we optimize this for large objects? If we are continuing a
856 // large object that spans multiple pages, then we may be able to do without
857 // calling FindPrecedingObject().
858 //
859 // Find the object which encapsulates offset in it, which could be
860 // starting at offset itself.
861 obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
862 // TODO: add a check to validate the object.
863 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
864 first_objs_moving_space_[to_space_page_idx].Assign(obj);
865 to_space_page_idx++;
866 chunk_idx++;
867 }
868 }
869
InitNonMovingFirstObjects(uintptr_t begin,uintptr_t end,accounting::ContinuousSpaceBitmap * bitmap,ObjReference * first_objs_arr)870 size_t MarkCompact::InitNonMovingFirstObjects(uintptr_t begin,
871 uintptr_t end,
872 accounting::ContinuousSpaceBitmap* bitmap,
873 ObjReference* first_objs_arr) {
874 mirror::Object* prev_obj;
875 size_t page_idx;
876 {
877 // Find first live object
878 mirror::Object* obj = nullptr;
879 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
880 end,
881 [&obj] (mirror::Object* o) {
882 obj = o;
883 });
884 if (obj == nullptr) {
885 // There are no live objects in the space
886 return 0;
887 }
888 page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
889 first_objs_arr[page_idx++].Assign(obj);
890 prev_obj = obj;
891 }
892 // TODO: check obj is valid
893 uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
894 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
895 // For every page find the object starting from which we need to call
896 // VisitReferences. It could either be an object that started on some
897 // preceding page, or some object starting within this page.
898 begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
899 while (begin < end) {
900 // Utilize, if any, large object that started in some preceding page, but
901 // overlaps with this page as well.
902 if (prev_obj != nullptr && prev_obj_end > begin) {
903 DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
904 first_objs_arr[page_idx].Assign(prev_obj);
905 } else {
906 prev_obj_end = 0;
907 // It's sufficient to only search for previous object in the preceding page.
908 // If no live object started in that page and some object had started in
909 // the page preceding to that page, which was big enough to overlap with
910 // the current page, then we wouldn't be in the else part.
911 prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
912 if (prev_obj != nullptr) {
913 prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
914 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
915 }
916 if (prev_obj_end > begin) {
917 first_objs_arr[page_idx].Assign(prev_obj);
918 } else {
919 // Find the first live object in this page
920 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
921 begin, begin + gPageSize, [first_objs_arr, page_idx](mirror::Object* obj) {
922 first_objs_arr[page_idx].Assign(obj);
923 });
924 }
925 // An empty entry indicates that the page has no live objects and hence
926 // can be skipped.
927 }
928 begin += gPageSize;
929 page_idx++;
930 }
931 return page_idx;
932 }
933
PrepareForCompaction()934 bool MarkCompact::PrepareForCompaction() {
935 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
936 size_t chunk_info_per_page = gPageSize / kOffsetChunkSize;
937 size_t vector_len = (black_allocations_begin_ - moving_space_begin_) / kOffsetChunkSize;
938 DCHECK_LE(vector_len, vector_length_);
939 DCHECK_ALIGNED_PARAM(vector_length_, chunk_info_per_page);
940 if (UNLIKELY(vector_len == 0)) {
941 // Nothing to compact.
942 return false;
943 }
944 for (size_t i = 0; i < vector_len; i++) {
945 DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize);
946 DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
947 }
948
949 // TODO: We can do a lot of neat tricks with this offset vector to tune the
950 // compaction as we wish. Originally, the compaction algorithm slides all
951 // live objects towards the beginning of the heap. This is nice because it
952 // keeps the spatial locality of objects intact.
953 // However, sometimes it's desired to compact objects in certain portions
954 // of the heap. For instance, it is expected that, over time,
955 // objects towards the beginning of the heap are long lived and are always
956 // densely packed. In this case, it makes sense to only update references in
957 // there and not try to compact it.
958 // Furthermore, we might have some large objects and may not want to move such
959 // objects.
960 // We can adjust, without too much effort, the values in the chunk_info_vec_ such
961 // that the objects in the dense beginning area aren't moved. OTOH, large
962 // objects, which could be anywhere in the heap, could also be kept from
963 // moving by using a similar trick. The only issue is that by doing this we will
964 // leave an unused hole in the middle of the heap which can't be used for
965 // allocations until we do a *full* compaction.
966 //
967 // At this point every element in the chunk_info_vec_ contains the live-bytes
968 // of the corresponding chunk. For old-to-new address computation we need
969 // every element to reflect total live-bytes till the corresponding chunk.
970
971 size_t black_dense_idx = 0;
972 GcCause gc_cause = GetCurrentIteration()->GetGcCause();
973 if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition &&
974 !GetCurrentIteration()->GetClearSoftReferences()) {
975 uint64_t live_bytes = 0, total_bytes = 0;
976 size_t aligned_vec_len = RoundUp(vector_len, chunk_info_per_page);
977 size_t num_pages = aligned_vec_len / chunk_info_per_page;
978 size_t threshold_passing_marker = 0; // In number of pages
979 std::vector<uint32_t> pages_live_bytes;
980 pages_live_bytes.reserve(num_pages);
981 // Identify the largest chunk towards the beginning of moving space which
982 // passes the black-dense threshold.
983 for (size_t i = 0; i < aligned_vec_len; i += chunk_info_per_page) {
984 uint32_t page_live_bytes = 0;
985 for (size_t j = 0; j < chunk_info_per_page; j++) {
986 page_live_bytes += chunk_info_vec_[i + j];
987 total_bytes += kOffsetChunkSize;
988 }
989 live_bytes += page_live_bytes;
990 pages_live_bytes.push_back(page_live_bytes);
991 if (live_bytes * 100U >= total_bytes * kBlackDenseRegionThreshold) {
992 threshold_passing_marker = pages_live_bytes.size();
993 }
994 }
995 DCHECK_EQ(pages_live_bytes.size(), num_pages);
996 // Eliminate the pages at the end of the chunk which are lower than the threshold.
997 if (threshold_passing_marker > 0) {
998 auto iter = std::find_if(
999 pages_live_bytes.rbegin() + (num_pages - threshold_passing_marker),
1000 pages_live_bytes.rend(),
1001 [](uint32_t bytes) { return bytes * 100U >= gPageSize * kBlackDenseRegionThreshold; });
1002 black_dense_idx = (pages_live_bytes.rend() - iter) * chunk_info_per_page;
1003 }
1004 black_dense_end_ = moving_space_begin_ + black_dense_idx * kOffsetChunkSize;
1005 DCHECK_ALIGNED_PARAM(black_dense_end_, gPageSize);
1006
1007 // Adjust for class allocated after black_dense_end_ while its object(s)
1008 // are earlier. This is required as we update the references in the
1009 // black-dense region in-place. And if the class pointer of some first
1010 // object for a page, which started in some preceding page, is already
1011 // updated, then we will read wrong class data like ref-offset bitmap.
1012 for (auto iter = class_after_obj_map_.rbegin();
1013 iter != class_after_obj_map_.rend() &&
1014 reinterpret_cast<uint8_t*>(iter->first.AsMirrorPtr()) >= black_dense_end_;
1015 iter++) {
1016 black_dense_end_ =
1017 std::min(black_dense_end_, reinterpret_cast<uint8_t*>(iter->second.AsMirrorPtr()));
1018 black_dense_end_ = AlignDown(black_dense_end_, gPageSize);
1019 }
1020 black_dense_idx = (black_dense_end_ - moving_space_begin_) / kOffsetChunkSize;
1021 DCHECK_LE(black_dense_idx, vector_len);
1022 if (black_dense_idx == vector_len) {
1023 // There is nothing to compact.
1024 return false;
1025 }
1026 InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
1027 reinterpret_cast<uintptr_t>(black_dense_end_),
1028 moving_space_bitmap_,
1029 first_objs_moving_space_);
1030 }
1031
1032 InitMovingSpaceFirstObjects(vector_len, black_dense_idx / chunk_info_per_page);
1033 non_moving_first_objs_count_ =
1034 InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(non_moving_space_->Begin()),
1035 reinterpret_cast<uintptr_t>(non_moving_space_->End()),
1036 non_moving_space_->GetLiveBitmap(),
1037 first_objs_non_moving_space_);
1038 // Update the vector one past the heap usage as it is required for black
1039 // allocated objects' post-compact address computation.
1040 uint32_t total_bytes;
1041 if (vector_len < vector_length_) {
1042 vector_len++;
1043 total_bytes = 0;
1044 } else {
1045 // Fetch the value stored in the last element before it gets overwritten by
1046 // std::exclusive_scan().
1047 total_bytes = chunk_info_vec_[vector_len - 1];
1048 }
1049 std::exclusive_scan(chunk_info_vec_ + black_dense_idx,
1050 chunk_info_vec_ + vector_len,
1051 chunk_info_vec_ + black_dense_idx,
1052 black_dense_idx * kOffsetChunkSize);
1053 total_bytes += chunk_info_vec_[vector_len - 1];
1054 post_compact_end_ = AlignUp(moving_space_begin_ + total_bytes, gPageSize);
1055 CHECK_EQ(post_compact_end_, moving_space_begin_ + moving_first_objs_count_ * gPageSize)
1056 << "moving_first_objs_count_:" << moving_first_objs_count_
1057 << " black_dense_idx:" << black_dense_idx << " vector_len:" << vector_len
1058 << " total_bytes:" << total_bytes
1059 << " black_dense_end:" << reinterpret_cast<void*>(black_dense_end_)
1060 << " chunk_info_per_page:" << chunk_info_per_page;
1061 black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
1062 // We shouldn't be consuming more space after compaction than pre-compaction.
1063 CHECK_GE(black_objs_slide_diff_, 0);
1064 if (black_objs_slide_diff_ == 0) {
1065 black_dense_end_ = black_allocations_begin_;
1066 return false;
1067 }
1068 for (size_t i = vector_len; i < vector_length_; i++) {
1069 DCHECK_EQ(chunk_info_vec_[i], 0u);
1070 }
1071
1072 // How do we handle compaction of heap portion used for allocations after the
1073 // marking-pause?
1074 // All allocations after the marking-pause are considered black (reachable)
1075 // for this GC cycle. However, they need not be allocated contiguously as
1076 // different mutators use TLABs. So we will compact the heap till the point
1077 // where allocations took place before the marking-pause. And everything after
1078 // that will be slid with TLAB holes, and then TLAB info in TLS will be
1079 // appropriately updated in the pre-compaction pause.
1080 // The chunk-info vector entries for the post marking-pause allocations will be
1081 // also updated in the pre-compaction pause.
1082
1083 if (!uffd_initialized_) {
1084 CreateUserfaultfd(/*post_fork=*/false);
1085 }
1086 return true;
1087 }
1088
1089 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1090 public:
VerifyRootMarkedVisitor(MarkCompact * collector)1091 explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1092
VisitRoot(mirror::Object * root,const RootInfo & info)1093 void VisitRoot(mirror::Object* root, const RootInfo& info) override
1094 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1095 CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1096 }
1097
1098 private:
1099 MarkCompact* const collector_;
1100 };
1101
ReMarkRoots(Runtime * runtime)1102 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1103 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1104 DCHECK_EQ(thread_running_gc_, Thread::Current());
1105 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1106 MarkNonThreadRoots(runtime);
1107 MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots
1108 | kVisitRootFlagStopLoggingNewRoots
1109 | kVisitRootFlagClearRootLog),
1110 runtime);
1111
1112 if (kVerifyRootsMarked) {
1113 TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1114 VerifyRootMarkedVisitor visitor(this);
1115 runtime->VisitRoots(&visitor);
1116 }
1117 }
1118
MarkingPause()1119 void MarkCompact::MarkingPause() {
1120 TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1121 Runtime* runtime = Runtime::Current();
1122 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1123 {
1124 // Handle the dirty objects as we are a concurrent GC
1125 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1126 {
1127 MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1128 MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1129 std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1130 for (Thread* thread : thread_list) {
1131 thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1132 DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1133 // Need to revoke all the thread-local allocation stacks since we will
1134 // swap the allocation stacks (below) and don't want anybody to allocate
1135 // into the live stack.
1136 thread->RevokeThreadLocalAllocationStack();
1137 bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1138 }
1139 }
1140 // Fetch only the accumulated objects-allocated count as it is guaranteed to
1141 // be up-to-date after the TLAB revocation above.
1142 freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1143 // Capture 'end' of moving-space at this point. Every allocation beyond this
1144 // point will be considered as black.
1145 // Align-up to page boundary so that black allocations happen from next page
1146 // onwards. Also, it ensures that 'end' is aligned for card-table's
1147 // ClearCardRange().
1148 black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
1149 DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
1150
1151 // Re-mark root set. Doesn't include thread-roots as they are already marked
1152 // above.
1153 ReMarkRoots(runtime);
1154 // Scan dirty objects.
1155 RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1156 {
1157 TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1158 heap_->SwapStacks();
1159 live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1160 }
1161 }
1162 // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1163 // objects in bump-pointer space when we have a mark-bitmap to indicate live
1164 // objects. At the same time we also need to be able to visit black allocations,
1165 // even though they are not marked in the bitmap. Without both of these we fail
1166 // pre-sweeping verification. As well as we leave windows open wherein a
1167 // VisitObjects/Walk on the space would either miss some objects or visit
1168 // unreachable ones. These windows are when we are switching from shared
1169 // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1170 // heap_->PreSweepingGcVerification(this);
1171
1172 // Disallow new system weaks to prevent a race which occurs when someone adds
1173 // a new system weak before we sweep them. Since this new system weak may not
1174 // be marked, the GC may incorrectly sweep it. This also fixes a race where
1175 // interning may attempt to return a strong reference to a string that is
1176 // about to be swept.
1177 runtime->DisallowNewSystemWeaks();
1178 // Enable the reference processing slow path, needs to be done with mutators
1179 // paused since there is no lock in the GetReferent fast path.
1180 heap_->GetReferenceProcessor()->EnableSlowPath();
1181 marking_done_ = true;
1182 }
1183
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1184 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1185 TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1186 GetTimings());
1187 ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1188 runtime->SweepSystemWeaks(this);
1189 }
1190
ProcessReferences(Thread * self)1191 void MarkCompact::ProcessReferences(Thread* self) {
1192 WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1193 GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1194 }
1195
Sweep(bool swap_bitmaps)1196 void MarkCompact::Sweep(bool swap_bitmaps) {
1197 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1198 // Ensure that nobody inserted objects in the live stack after we swapped the
1199 // stacks.
1200 CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1201 {
1202 TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1203 // Mark everything allocated since the last GC as live so that we can sweep
1204 // concurrently, knowing that new allocations won't be marked as live.
1205 accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1206 heap_->MarkAllocStackAsLive(live_stack);
1207 live_stack->Reset();
1208 DCHECK(mark_stack_->IsEmpty());
1209 }
1210 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1211 if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
1212 !immune_spaces_.ContainsSpace(space)) {
1213 space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1214 DCHECK(!alloc_space->IsZygoteSpace());
1215 TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
1216 RecordFree(alloc_space->Sweep(swap_bitmaps));
1217 }
1218 }
1219 SweepLargeObjects(swap_bitmaps);
1220 }
1221
SweepLargeObjects(bool swap_bitmaps)1222 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1223 space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1224 if (los != nullptr) {
1225 TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1226 RecordFreeLOS(los->Sweep(swap_bitmaps));
1227 }
1228 }
1229
ReclaimPhase()1230 void MarkCompact::ReclaimPhase() {
1231 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1232 DCHECK(thread_running_gc_ == Thread::Current());
1233 Runtime* const runtime = Runtime::Current();
1234 // Process the references concurrently.
1235 ProcessReferences(thread_running_gc_);
1236 // TODO: Try to merge this system-weak sweeping with the one while updating
1237 // references during the compaction pause.
1238 SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1239 runtime->AllowNewSystemWeaks();
1240 // Clean up class loaders after system weaks are swept since that is how we know if class
1241 // unloading occurred.
1242 runtime->GetClassLinker()->CleanupClassLoaders();
1243 {
1244 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1245 // Reclaim unmarked objects.
1246 Sweep(false);
1247 // Swap the live and mark bitmaps for each space which we modified space. This is an
1248 // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1249 // bitmaps.
1250 SwapBitmaps();
1251 // Unbind the live and mark bitmaps.
1252 GetHeap()->UnBindBitmaps();
1253 }
1254 }
1255
1256 // We want to avoid checking for every reference if it's within the page or
1257 // not. This can be done if we know where in the page the holder object lies.
1258 // If it doesn't overlap either boundaries then we can skip the checks.
1259 template <bool kCheckBegin, bool kCheckEnd>
1260 class MarkCompact::RefsUpdateVisitor {
1261 public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end)1262 explicit RefsUpdateVisitor(MarkCompact* collector,
1263 mirror::Object* obj,
1264 uint8_t* begin,
1265 uint8_t* end)
1266 : collector_(collector),
1267 moving_space_begin_(collector->black_dense_end_),
1268 moving_space_end_(collector->moving_space_end_),
1269 obj_(obj),
1270 begin_(begin),
1271 end_(end) {
1272 DCHECK(!kCheckBegin || begin != nullptr);
1273 DCHECK(!kCheckEnd || end != nullptr);
1274 }
1275
operator ()(mirror::Object * old,MemberOffset offset,bool is_static) const1276 void operator()([[maybe_unused]] mirror::Object* old,
1277 MemberOffset offset,
1278 [[maybe_unused]] bool is_static) const ALWAYS_INLINE
1279 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1280 bool update = true;
1281 if (kCheckBegin || kCheckEnd) {
1282 uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1283 update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1284 }
1285 if (update) {
1286 collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1287 }
1288 }
1289
1290 // For object arrays we don't need to check boundaries here as it's done in
1291 // VisitReferenes().
1292 // TODO: Optimize reference updating using SIMD instructions. Object arrays
1293 // are perfect as all references are tightly packed.
operator ()(mirror::Object * old,MemberOffset offset,bool is_static,bool is_obj_array) const1294 void operator()([[maybe_unused]] mirror::Object* old,
1295 MemberOffset offset,
1296 [[maybe_unused]] bool is_static,
1297 [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
1298 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1299 collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1300 }
1301
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1302 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1303 ALWAYS_INLINE
1304 REQUIRES_SHARED(Locks::mutator_lock_) {
1305 if (!root->IsNull()) {
1306 VisitRoot(root);
1307 }
1308 }
1309
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1310 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1311 ALWAYS_INLINE
1312 REQUIRES_SHARED(Locks::mutator_lock_) {
1313 collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
1314 }
1315
1316 private:
1317 MarkCompact* const collector_;
1318 uint8_t* const moving_space_begin_;
1319 uint8_t* const moving_space_end_;
1320 mirror::Object* const obj_;
1321 uint8_t* const begin_;
1322 uint8_t* const end_;
1323 };
1324
IsValidObject(mirror::Object * obj) const1325 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1326 mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1327 if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1328 return false;
1329 }
1330 return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1331 obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1332 }
1333
1334 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1335 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1336 if (kIsDebugBuild) {
1337 mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1338 mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1339 mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1340 mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1341 if (HasAddress(pre_compact_klass) &&
1342 reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1343 CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1344 << "ref=" << ref
1345 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1346 << " pre_compact_klass=" << pre_compact_klass
1347 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1348 CHECK(live_words_bitmap_->Test(pre_compact_klass));
1349 }
1350 if (!IsValidObject(ref)) {
1351 std::ostringstream oss;
1352 oss << "Invalid object: "
1353 << "ref=" << ref
1354 << " klass=" << klass
1355 << " klass_klass=" << klass_klass
1356 << " klass_klass_klass=" << klass_klass_klass
1357 << " pre_compact_klass=" << pre_compact_klass
1358 << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1359 << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1360 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1361 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1362
1363 // Call callback before dumping larger data like RAM and space dumps.
1364 callback(oss);
1365
1366 oss << " \nobject="
1367 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1368 << " \nklass(from)="
1369 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1370 << "spaces:\n";
1371 heap_->DumpSpaces(oss);
1372 LOG(FATAL) << oss.str();
1373 }
1374 }
1375 }
1376
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,bool needs_memset_zero)1377 void MarkCompact::CompactPage(mirror::Object* obj,
1378 uint32_t offset,
1379 uint8_t* addr,
1380 bool needs_memset_zero) {
1381 DCHECK(moving_space_bitmap_->Test(obj)
1382 && live_words_bitmap_->Test(obj));
1383 DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1384 << " offset=" << offset
1385 << " addr=" << static_cast<void*>(addr)
1386 << " black_allocs_begin="
1387 << static_cast<void*>(black_allocations_begin_)
1388 << " post_compact_addr="
1389 << static_cast<void*>(post_compact_end_);
1390 uint8_t* const start_addr = addr;
1391 // How many distinct live-strides do we have.
1392 size_t stride_count = 0;
1393 uint8_t* last_stride = addr;
1394 uint32_t last_stride_begin = 0;
1395 auto verify_obj_callback = [&] (std::ostream& os) {
1396 os << " stride_count=" << stride_count
1397 << " last_stride=" << static_cast<void*>(last_stride)
1398 << " offset=" << offset
1399 << " start_addr=" << static_cast<void*>(start_addr);
1400 };
1401 obj = GetFromSpaceAddr(obj);
1402 live_words_bitmap_->VisitLiveStrides(
1403 offset,
1404 black_allocations_begin_,
1405 gPageSize,
1406 [&addr, &last_stride, &stride_count, &last_stride_begin, verify_obj_callback, this](
1407 uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
1408 REQUIRES_SHARED(Locks::mutator_lock_) {
1409 const size_t stride_in_bytes = stride_size * kAlignment;
1410 DCHECK_LE(stride_in_bytes, gPageSize);
1411 last_stride_begin = stride_begin;
1412 DCHECK(IsAligned<kAlignment>(addr));
1413 memcpy(addr, from_space_begin_ + stride_begin * kAlignment, stride_in_bytes);
1414 if (kIsDebugBuild) {
1415 uint8_t* space_begin = bump_pointer_space_->Begin();
1416 // We can interpret the first word of the stride as an
1417 // obj only from second stride onwards, as the first
1418 // stride's first-object may have started on previous
1419 // page. The only exception is the first page of the
1420 // moving space.
1421 if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
1422 mirror::Object* o =
1423 reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
1424 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1425 CHECK(moving_space_bitmap_->Test(o))
1426 << "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
1427 VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
1428 }
1429 }
1430 last_stride = addr;
1431 addr += stride_in_bytes;
1432 stride_count++;
1433 });
1434 DCHECK_LT(last_stride, start_addr + gPageSize);
1435 DCHECK_GT(stride_count, 0u);
1436 size_t obj_size = 0;
1437 uint32_t offset_within_obj = offset * kAlignment
1438 - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_);
1439 // First object
1440 if (offset_within_obj > 0) {
1441 mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1442 if (stride_count > 1) {
1443 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1444 to_ref,
1445 start_addr,
1446 nullptr);
1447 obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1448 visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1449 } else {
1450 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1451 to_ref,
1452 start_addr,
1453 start_addr + gPageSize);
1454 obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1455 visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj
1456 + gPageSize));
1457 }
1458 obj_size = RoundUp(obj_size, kAlignment);
1459 DCHECK_GT(obj_size, offset_within_obj)
1460 << "obj:" << obj << " class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1461 << " to_addr:" << to_ref
1462 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1463 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1464 << " offset:" << offset * kAlignment << " class-after-obj-iter:"
1465 << (class_after_obj_iter_ != class_after_obj_map_.rend() ?
1466 class_after_obj_iter_->first.AsMirrorPtr() :
1467 nullptr)
1468 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1469 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1470 << " offset-of-last-idx:"
1471 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1472 << " first-obj-of-last-idx:"
1473 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1474
1475 obj_size -= offset_within_obj;
1476 // If there is only one stride, then adjust last_stride_begin to the
1477 // end of the first object.
1478 if (stride_count == 1) {
1479 last_stride_begin += obj_size / kAlignment;
1480 }
1481 }
1482
1483 // Except for the last page being compacted, the pages will have addr ==
1484 // start_addr + gPageSize.
1485 uint8_t* const end_addr = addr;
1486 addr = start_addr;
1487 size_t bytes_done = obj_size;
1488 // All strides except the last one can be updated without any boundary
1489 // checks.
1490 DCHECK_LE(addr, last_stride);
1491 size_t bytes_to_visit = last_stride - addr;
1492 DCHECK_LE(bytes_to_visit, gPageSize);
1493 while (bytes_to_visit > bytes_done) {
1494 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1495 VerifyObject(ref, verify_obj_callback);
1496 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1497 visitor(this, ref, nullptr, nullptr);
1498 obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1499 obj_size = RoundUp(obj_size, kAlignment);
1500 bytes_done += obj_size;
1501 }
1502 // Last stride may have multiple objects in it and we don't know where the
1503 // last object which crosses the page boundary starts, therefore check
1504 // page-end in all of these objects. Also, we need to call
1505 // VisitRefsForCompaction() with from-space object as we fetch object size,
1506 // which in case of klass requires 'class_size_'.
1507 uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1508 bytes_to_visit = end_addr - addr;
1509 DCHECK_LE(bytes_to_visit, gPageSize);
1510 while (bytes_to_visit > bytes_done) {
1511 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1512 obj = reinterpret_cast<mirror::Object*>(from_addr);
1513 VerifyObject(ref, verify_obj_callback);
1514 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
1515 visitor(this, ref, nullptr, start_addr + gPageSize);
1516 obj_size = obj->VisitRefsForCompaction(visitor,
1517 MemberOffset(0),
1518 MemberOffset(end_addr - (addr + bytes_done)));
1519 obj_size = RoundUp(obj_size, kAlignment);
1520 DCHECK_GT(obj_size, 0u)
1521 << "from_addr:" << obj
1522 << " from-space-class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1523 << " to_addr:" << ref
1524 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1525 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1526 << " offset:" << offset * kAlignment << " bytes_done:" << bytes_done
1527 << " class-after-obj-iter:"
1528 << (class_after_obj_iter_ != class_after_obj_map_.rend() ?
1529 class_after_obj_iter_->first.AsMirrorPtr() :
1530 nullptr)
1531 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1532 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1533 << " offset-of-last-idx:"
1534 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1535 << " first-obj-of-last-idx:"
1536 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1537
1538 from_addr += obj_size;
1539 bytes_done += obj_size;
1540 }
1541 // The last page that we compact may have some bytes left untouched in the
1542 // end, we should zero them as the kernel copies at page granularity.
1543 if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
1544 std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
1545 }
1546 }
1547
1548 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1549 // size. If more TLAB(s) started in this page, then those chunks are identified
1550 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1551 // If we find a set bit in the bitmap, then we copy the remaining page and then
1552 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,mirror::Object * next_page_first_obj,uint32_t first_chunk_size,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1553 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1554 mirror::Object* next_page_first_obj,
1555 uint32_t first_chunk_size,
1556 uint8_t* const pre_compact_page,
1557 uint8_t* dest,
1558 bool needs_memset_zero) {
1559 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
1560 size_t bytes_copied;
1561 uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1562 uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1563 uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
1564 uint8_t* const dest_page_end = dest + gPageSize;
1565
1566 auto verify_obj_callback = [&] (std::ostream& os) {
1567 os << " first_obj=" << first_obj
1568 << " next_page_first_obj=" << next_page_first_obj
1569 << " first_chunk_sie=" << first_chunk_size
1570 << " dest=" << static_cast<void*>(dest)
1571 << " pre_compact_page="
1572 << static_cast<void* const>(pre_compact_page);
1573 };
1574 // We have empty portion at the beginning of the page. Zero it.
1575 if (pre_compact_addr > pre_compact_page) {
1576 bytes_copied = pre_compact_addr - pre_compact_page;
1577 DCHECK_LT(bytes_copied, gPageSize);
1578 if (needs_memset_zero) {
1579 std::memset(dest, 0x0, bytes_copied);
1580 }
1581 dest += bytes_copied;
1582 } else {
1583 bytes_copied = 0;
1584 size_t offset = pre_compact_page - pre_compact_addr;
1585 pre_compact_addr = pre_compact_page;
1586 src_addr += offset;
1587 DCHECK(IsAlignedParam(src_addr, gPageSize));
1588 }
1589 // Copy the first chunk of live words
1590 std::memcpy(dest, src_addr, first_chunk_size);
1591 // Update references in the first chunk. Use object size to find next object.
1592 {
1593 size_t bytes_to_visit = first_chunk_size;
1594 size_t obj_size;
1595 // The first object started in some previous page. So we need to check the
1596 // beginning.
1597 DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1598 size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1599 if (bytes_copied == 0 && offset > 0) {
1600 mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1601 mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1602 // If the next page's first-obj is in this page or nullptr, then we don't
1603 // need to check end boundary
1604 if (next_page_first_obj == nullptr
1605 || (first_obj != next_page_first_obj
1606 && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1607 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1608 to_obj,
1609 dest,
1610 nullptr);
1611 obj_size = from_obj->VisitRefsForCompaction<
1612 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1613 MemberOffset(offset),
1614 MemberOffset(-1));
1615 } else {
1616 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1617 to_obj,
1618 dest,
1619 dest_page_end);
1620 obj_size = from_obj->VisitRefsForCompaction<
1621 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1622 MemberOffset(offset),
1623 MemberOffset(offset
1624 + gPageSize));
1625 if (first_obj == next_page_first_obj) {
1626 // First object is the only object on this page. So there's nothing else left to do.
1627 return;
1628 }
1629 }
1630 obj_size = RoundUp(obj_size, kAlignment);
1631 obj_size -= offset;
1632 dest += obj_size;
1633 bytes_to_visit -= obj_size;
1634 }
1635 bytes_copied += first_chunk_size;
1636 // If the last object in this page is next_page_first_obj, then we need to check end boundary
1637 bool check_last_obj = false;
1638 if (next_page_first_obj != nullptr
1639 && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
1640 && bytes_copied == gPageSize) {
1641 size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
1642 DCHECK_LE(diff, gPageSize);
1643 DCHECK_LE(diff, bytes_to_visit);
1644 bytes_to_visit -= diff;
1645 check_last_obj = true;
1646 }
1647 while (bytes_to_visit > 0) {
1648 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1649 VerifyObject(dest_obj, verify_obj_callback);
1650 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
1651 dest_obj,
1652 nullptr,
1653 nullptr);
1654 obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1655 obj_size = RoundUp(obj_size, kAlignment);
1656 bytes_to_visit -= obj_size;
1657 dest += obj_size;
1658 }
1659 DCHECK_EQ(bytes_to_visit, 0u);
1660 if (check_last_obj) {
1661 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1662 VerifyObject(dest_obj, verify_obj_callback);
1663 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1664 dest_obj,
1665 nullptr,
1666 dest_page_end);
1667 mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
1668 obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1669 MemberOffset(0),
1670 MemberOffset(dest_page_end - dest));
1671 return;
1672 }
1673 }
1674
1675 // Probably a TLAB finished on this page and/or a new TLAB started as well.
1676 if (bytes_copied < gPageSize) {
1677 src_addr += first_chunk_size;
1678 pre_compact_addr += first_chunk_size;
1679 // Use mark-bitmap to identify where objects are. First call
1680 // VisitMarkedRange for only the first marked bit. If found, zero all bytes
1681 // until that object and then call memcpy on the rest of the page.
1682 // Then call VisitMarkedRange for all marked bits *after* the one found in
1683 // this invocation. This time to visit references.
1684 uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
1685 uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
1686 mirror::Object* found_obj = nullptr;
1687 moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
1688 page_end,
1689 [&found_obj](mirror::Object* obj) {
1690 found_obj = obj;
1691 });
1692 size_t remaining_bytes = gPageSize - bytes_copied;
1693 if (found_obj == nullptr) {
1694 if (needs_memset_zero) {
1695 // No more black objects in this page. Zero the remaining bytes and return.
1696 std::memset(dest, 0x0, remaining_bytes);
1697 }
1698 return;
1699 }
1700 // Copy everything in this page, which includes any zeroed regions
1701 // in-between.
1702 std::memcpy(dest, src_addr, remaining_bytes);
1703 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1704 moving_space_bitmap_->VisitMarkedRange(
1705 reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
1706 page_end,
1707 [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
1708 REQUIRES_SHARED(Locks::mutator_lock_) {
1709 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1710 mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1711 VerifyObject(ref, verify_obj_callback);
1712 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1713 visitor(this, ref, nullptr, nullptr);
1714 ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1715 MemberOffset(0),
1716 MemberOffset(-1));
1717 // Remember for next round.
1718 found_obj = obj;
1719 });
1720 // found_obj may have been updated in VisitMarkedRange. Visit the last found
1721 // object.
1722 DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
1723 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1724 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1725 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
1726 VerifyObject(dest_obj, verify_obj_callback);
1727 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
1728 this, dest_obj, nullptr, dest_page_end);
1729 // Last object could overlap with next page. And if it happens to be a
1730 // class, then we may access something (like static-fields' offsets) which
1731 // is on the next page. Therefore, use from-space's reference.
1732 mirror::Object* obj = GetFromSpaceAddr(found_obj);
1733 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
1734 visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
1735 }
1736 }
1737
1738 template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
BackOff(uint32_t i)1739 static void BackOff(uint32_t i) {
1740 // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
1741 if (i <= kYieldMax) {
1742 sched_yield();
1743 } else {
1744 // nanosleep is not in the async-signal-safe list, but bionic implements it
1745 // with a pure system call, so it should be fine.
1746 NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
1747 }
1748 }
1749
ZeropageIoctl(void * addr,size_t length,bool tolerate_eexist,bool tolerate_enoent)1750 size_t MarkCompact::ZeropageIoctl(void* addr,
1751 size_t length,
1752 bool tolerate_eexist,
1753 bool tolerate_enoent) {
1754 int32_t backoff_count = -1;
1755 int32_t max_backoff = 10; // max native priority.
1756 struct uffdio_zeropage uffd_zeropage;
1757 DCHECK(IsAlignedParam(addr, gPageSize));
1758 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
1759 uffd_zeropage.range.len = length;
1760 uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
1761 while (true) {
1762 uffd_zeropage.zeropage = 0;
1763 int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
1764 if (ret == 0) {
1765 DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
1766 return length;
1767 } else if (errno == EAGAIN) {
1768 if (uffd_zeropage.zeropage > 0) {
1769 // Contention was observed after acquiring mmap_lock. But the first page
1770 // is already done, which is what we care about.
1771 DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
1772 DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
1773 return uffd_zeropage.zeropage;
1774 } else if (uffd_zeropage.zeropage < 0) {
1775 // mmap_read_trylock() failed due to contention. Back-off and retry.
1776 DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
1777 if (backoff_count == -1) {
1778 int prio = Thread::Current()->GetNativePriority();
1779 DCHECK(prio > 0 && prio <= 10) << prio;
1780 max_backoff -= prio;
1781 backoff_count = 0;
1782 }
1783 if (backoff_count < max_backoff) {
1784 // Using 3 to align 'normal' priority threads with sleep.
1785 BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
1786 } else {
1787 uffd_zeropage.mode = 0;
1788 }
1789 }
1790 } else if (tolerate_eexist && errno == EEXIST) {
1791 // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
1792 // wouldn't be included in it.
1793 return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
1794 } else {
1795 CHECK(tolerate_enoent && errno == ENOENT)
1796 << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
1797 return 0;
1798 }
1799 }
1800 }
1801
CopyIoctl(void * dst,void * buffer,size_t length,bool return_on_contention,bool tolerate_enoent)1802 size_t MarkCompact::CopyIoctl(
1803 void* dst, void* buffer, size_t length, bool return_on_contention, bool tolerate_enoent) {
1804 int32_t backoff_count = -1;
1805 int32_t max_backoff = 10; // max native priority.
1806 struct uffdio_copy uffd_copy;
1807 uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
1808 uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
1809 uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
1810 uffd_copy.len = length;
1811 uffd_copy.copy = 0;
1812 while (true) {
1813 int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
1814 if (ret == 0) {
1815 DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
1816 break;
1817 } else if (errno == EAGAIN) {
1818 // Contention observed.
1819 DCHECK_NE(uffd_copy.copy, 0);
1820 if (uffd_copy.copy > 0) {
1821 // Contention was observed after acquiring mmap_lock.
1822 DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
1823 DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
1824 break;
1825 } else {
1826 // mmap_read_trylock() failed due to contention.
1827 DCHECK_EQ(uffd_copy.copy, -EAGAIN);
1828 uffd_copy.copy = 0;
1829 if (return_on_contention) {
1830 break;
1831 }
1832 }
1833 if (backoff_count == -1) {
1834 int prio = Thread::Current()->GetNativePriority();
1835 DCHECK(prio > 0 && prio <= 10) << prio;
1836 max_backoff -= prio;
1837 backoff_count = 0;
1838 }
1839 if (backoff_count < max_backoff) {
1840 // Using 3 to align 'normal' priority threads with sleep.
1841 BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
1842 } else {
1843 uffd_copy.mode = 0;
1844 }
1845 } else if (errno == EEXIST) {
1846 DCHECK_NE(uffd_copy.copy, 0);
1847 if (uffd_copy.copy < 0) {
1848 uffd_copy.copy = 0;
1849 }
1850 // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
1851 // wouldn't be included in it.
1852 uffd_copy.copy += gPageSize;
1853 break;
1854 } else {
1855 CHECK(tolerate_enoent && errno == ENOENT)
1856 << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
1857 << " dst:" << dst;
1858 return uffd_copy.copy > 0 ? uffd_copy.copy : 0;
1859 }
1860 }
1861 return uffd_copy.copy;
1862 }
1863
1864 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,uint8_t * to_space_page,uint8_t * page,bool map_immediately,CompactionFn func)1865 bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
1866 uint8_t* to_space_page,
1867 uint8_t* page,
1868 bool map_immediately,
1869 CompactionFn func) {
1870 uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
1871 uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
1872 PageState::kProcessing);
1873 // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
1874 // to moving_spaces_status_[page_idx] is released before the contents of the page are
1875 // made accessible to other threads.
1876 //
1877 // We need acquire ordering here to ensure that when the CAS fails, another thread
1878 // has completed processing the page, which is guaranteed by the release below.
1879 if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
1880 expected_state, desired_state, std::memory_order_acquire)) {
1881 func();
1882 if (kMode == kCopyMode) {
1883 if (map_immediately) {
1884 CopyIoctl(to_space_page,
1885 page,
1886 gPageSize,
1887 /*return_on_contention=*/false,
1888 /*tolerate_enoent=*/false);
1889 // Store is sufficient as no other thread could modify the status at this
1890 // point. Relaxed order is sufficient as the ioctl will act as a fence.
1891 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
1892 std::memory_order_relaxed);
1893 } else {
1894 // Add the src page's index in the status word.
1895 DCHECK(from_space_map_.HasAddress(page));
1896 DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
1897 std::numeric_limits<uint32_t>::max());
1898 uint32_t store_val = page - from_space_begin_;
1899 DCHECK_EQ(store_val & kPageStateMask, 0u);
1900 store_val |= static_cast<uint8_t>(PageState::kProcessed);
1901 // Store is sufficient as no other thread would modify the status at this point.
1902 moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
1903 }
1904 }
1905 return true;
1906 } else {
1907 // Only GC thread could have set the state to Processed.
1908 DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
1909 return false;
1910 }
1911 }
1912
FreeFromSpacePages(size_t cur_page_idx,int mode,size_t end_idx_for_mapping)1913 bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
1914 // Thanks to sliding compaction, bump-pointer allocations, and reverse
1915 // compaction (see CompactMovingSpace) the logic here is pretty simple: find
1916 // the to-space page up to which compaction has finished, all the from-space
1917 // pages corresponding to this onwards can be freed. There are some corner
1918 // cases to be taken care of, which are described below.
1919 size_t idx = last_checked_reclaim_page_idx_;
1920 // Find the to-space page up to which the corresponding from-space pages can be
1921 // freed.
1922 for (; idx > cur_page_idx; idx--) {
1923 PageState state = GetMovingPageState(idx - 1);
1924 if (state == PageState::kMutatorProcessing) {
1925 // Some mutator is working on the page.
1926 break;
1927 }
1928 DCHECK(state >= PageState::kProcessed ||
1929 (state == PageState::kUnprocessed &&
1930 (mode == kFallbackMode || idx > moving_first_objs_count_)));
1931 }
1932 DCHECK_LE(idx, last_checked_reclaim_page_idx_);
1933 if (idx == last_checked_reclaim_page_idx_) {
1934 // Nothing to do.
1935 return false;
1936 }
1937
1938 uint8_t* reclaim_begin;
1939 uint8_t* idx_addr;
1940 // Calculate the first from-space page to be freed using 'idx'. If the
1941 // first-object of the idx'th to-space page started before the corresponding
1942 // from-space page, which is almost always the case in the compaction portion
1943 // of the moving-space, then it indicates that the subsequent pages that are
1944 // yet to be compacted will need the from-space pages. Therefore, find the page
1945 // (from the already compacted pages) whose first-object is different from
1946 // ours. All the from-space pages starting from that one are safe to be
1947 // removed. Please note that this iteration is not expected to be long in
1948 // normal cases as objects are smaller than page size.
1949 if (idx >= moving_first_objs_count_) {
1950 // black-allocated portion of the moving-space
1951 idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
1952 reclaim_begin = idx_addr;
1953 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
1954 if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
1955 size_t idx_len = moving_first_objs_count_ + black_page_count_;
1956 for (size_t i = idx + 1; i < idx_len; i++) {
1957 mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
1958 // A null first-object indicates that the corresponding to-space page is
1959 // not used yet. So we can compute its from-space page and use that.
1960 if (obj != first_obj) {
1961 reclaim_begin = obj != nullptr
1962 ? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
1963 : (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
1964 break;
1965 }
1966 }
1967 }
1968 } else {
1969 DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
1970 idx_addr = moving_space_begin_ + idx * gPageSize;
1971 if (idx_addr >= black_dense_end_) {
1972 idx_addr = moving_space_begin_ + pre_compact_offset_moving_space_[idx] * kAlignment;
1973 }
1974 reclaim_begin = idx_addr;
1975 DCHECK_LE(reclaim_begin, black_allocations_begin_);
1976 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
1977 if (first_obj != nullptr) {
1978 if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
1979 DCHECK_LT(idx, moving_first_objs_count_);
1980 mirror::Object* obj = first_obj;
1981 for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
1982 obj = first_objs_moving_space_[i].AsMirrorPtr();
1983 if (obj == nullptr) {
1984 reclaim_begin = moving_space_begin_ + i * gPageSize;
1985 break;
1986 } else if (first_obj != obj) {
1987 DCHECK_LT(first_obj, obj);
1988 DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
1989 reclaim_begin = reinterpret_cast<uint8_t*>(obj);
1990 break;
1991 }
1992 }
1993 if (obj == first_obj) {
1994 reclaim_begin = black_allocations_begin_;
1995 }
1996 }
1997 }
1998 reclaim_begin = AlignUp(reclaim_begin, gPageSize);
1999 }
2000
2001 DCHECK_NE(reclaim_begin, nullptr);
2002 DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
2003 DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
2004 // Check if the 'class_after_obj_map_' map allows pages to be freed.
2005 for (; class_after_obj_iter_ != class_after_obj_map_.rend(); class_after_obj_iter_++) {
2006 mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2007 mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2008 // Check with class' end to ensure that, if required, the entire class survives.
2009 uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2010 DCHECK_LE(klass_end, last_reclaimed_page_);
2011 if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2012 // Found a class which is in the reclaim range.
2013 if (reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr()) < idx_addr) {
2014 // Its lowest-address object is not compacted yet. Reclaim starting from
2015 // the end of this class.
2016 reclaim_begin = AlignUp(klass_end, gPageSize);
2017 } else {
2018 // Continue consuming pairs wherein the lowest address object has already
2019 // been compacted.
2020 continue;
2021 }
2022 }
2023 // All the remaining class (and thereby corresponding object) addresses are
2024 // lower than the reclaim range.
2025 break;
2026 }
2027 bool all_mapped = mode == kFallbackMode;
2028 ssize_t size = last_reclaimed_page_ - reclaim_begin;
2029 if (size > kMinFromSpaceMadviseSize) {
2030 // Map all the pages in the range.
2031 if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
2032 if (MapMovingSpacePages(cur_page_idx,
2033 end_idx_for_mapping,
2034 /*from_ioctl=*/false,
2035 /*return_on_contention=*/true,
2036 /*tolerate_enoent=*/false) == end_idx_for_mapping - cur_page_idx) {
2037 all_mapped = true;
2038 }
2039 } else {
2040 // This for the black-allocations pages so that madvise is not missed.
2041 all_mapped = true;
2042 }
2043 // If not all pages are mapped, then take it as a hint that mmap_lock is
2044 // contended and hence don't madvise as that also needs the same lock.
2045 if (all_mapped) {
2046 // Retain a few pages for subsequent compactions.
2047 const ssize_t gBufferPages = 4 * gPageSize;
2048 DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
2049 size -= gBufferPages;
2050 uint8_t* addr = last_reclaimed_page_ - size;
2051 CHECK_EQ(madvise(addr + from_space_slide_diff_, size, MADV_DONTNEED), 0)
2052 << "madvise of from-space failed: " << strerror(errno);
2053 last_reclaimed_page_ = addr;
2054 cur_reclaimable_page_ = addr;
2055 }
2056 }
2057 last_reclaimable_page_ = std::min(reclaim_begin, last_reclaimable_page_);
2058 last_checked_reclaim_page_idx_ = idx;
2059 return all_mapped;
2060 }
2061
2062 template <int kMode>
CompactMovingSpace(uint8_t * page)2063 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2064 // For every page we have a starting object, which may have started in some
2065 // preceding page, and an offset within that object from where we must start
2066 // copying.
2067 // Consult the live-words bitmap to copy all contiguously live words at a
2068 // time. These words may constitute multiple objects. To avoid the need for
2069 // consulting mark-bitmap to find where does the next live object start, we
2070 // use the object-size returned by VisitRefsForCompaction.
2071 //
2072 // We do the compaction in reverse direction so that the pages containing
2073 // TLAB and latest allocations are processed first.
2074 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2075 size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2076 size_t idx = page_status_arr_len;
2077 size_t black_dense_end_idx = (black_dense_end_ - moving_space_begin_) / gPageSize;
2078 uint8_t* to_space_end = moving_space_begin_ + page_status_arr_len * gPageSize;
2079 uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
2080
2081 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
2082
2083 // These variables are maintained by FreeFromSpacePages().
2084 last_reclaimed_page_ = pre_compact_page;
2085 last_reclaimable_page_ = last_reclaimed_page_;
2086 cur_reclaimable_page_ = last_reclaimed_page_;
2087 last_checked_reclaim_page_idx_ = idx;
2088 class_after_obj_iter_ = class_after_obj_map_.rbegin();
2089 // Allocated-black pages
2090 mirror::Object* next_page_first_obj = nullptr;
2091 while (idx > moving_first_objs_count_) {
2092 idx--;
2093 pre_compact_page -= gPageSize;
2094 to_space_end -= gPageSize;
2095 if (kMode == kFallbackMode) {
2096 page = to_space_end;
2097 }
2098 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2099 uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
2100 if (first_obj != nullptr) {
2101 DoPageCompactionWithStateChange<kMode>(idx,
2102 to_space_end,
2103 page,
2104 /*map_immediately=*/true,
2105 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2106 SlideBlackPage(first_obj,
2107 next_page_first_obj,
2108 first_chunk_size,
2109 pre_compact_page,
2110 page,
2111 kMode == kCopyMode);
2112 });
2113 // We are sliding here, so no point attempting to madvise for every
2114 // page. Wait for enough pages to be done.
2115 if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
2116 FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
2117 }
2118 }
2119 next_page_first_obj = first_obj;
2120 }
2121 DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2122 // Reserved page to be used if we can't find any reclaimable page for processing.
2123 uint8_t* reserve_page = page;
2124 size_t end_idx_for_mapping = idx;
2125 while (idx > black_dense_end_idx) {
2126 idx--;
2127 to_space_end -= gPageSize;
2128 if (kMode == kFallbackMode) {
2129 page = to_space_end;
2130 } else {
2131 DCHECK_EQ(kMode, kCopyMode);
2132 if (cur_reclaimable_page_ > last_reclaimable_page_) {
2133 cur_reclaimable_page_ -= gPageSize;
2134 page = cur_reclaimable_page_ + from_space_slide_diff_;
2135 } else {
2136 page = reserve_page;
2137 }
2138 }
2139 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2140 bool success = DoPageCompactionWithStateChange<kMode>(
2141 idx,
2142 to_space_end,
2143 page,
2144 /*map_immediately=*/page == reserve_page,
2145 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2146 CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
2147 });
2148 if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
2149 // map the pages in the following address as they can't be mapped with the
2150 // pages yet-to-be-compacted as their src-side pages won't be contiguous.
2151 MapMovingSpacePages(idx + 1,
2152 end_idx_for_mapping,
2153 /*from_fault=*/false,
2154 /*return_on_contention=*/true,
2155 /*tolerate_enoent=*/false);
2156 }
2157 if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2158 end_idx_for_mapping = idx;
2159 }
2160 }
2161 while (idx > 0) {
2162 idx--;
2163 to_space_end -= gPageSize;
2164 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2165 if (first_obj != nullptr) {
2166 DoPageCompactionWithStateChange<kMode>(
2167 idx,
2168 to_space_end,
2169 to_space_end + from_space_slide_diff_,
2170 /*map_immediately=*/false,
2171 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2172 UpdateNonMovingPage(
2173 first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
2174 if (kMode == kFallbackMode) {
2175 memcpy(to_space_end, to_space_end + from_space_slide_diff_, gPageSize);
2176 }
2177 });
2178 } else {
2179 // The page has no reachable object on it. Just declare it mapped.
2180 // Mutators shouldn't step on this page, which is asserted in sigbus
2181 // handler.
2182 DCHECK_EQ(moving_pages_status_[idx].load(std::memory_order_relaxed),
2183 static_cast<uint8_t>(PageState::kUnprocessed));
2184 moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2185 std::memory_order_release);
2186 }
2187 if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2188 end_idx_for_mapping = idx;
2189 }
2190 }
2191 // map one last time to finish anything left.
2192 if (kMode == kCopyMode && end_idx_for_mapping > 0) {
2193 MapMovingSpacePages(idx,
2194 end_idx_for_mapping,
2195 /*from_fault=*/false,
2196 /*return_on_contention=*/false,
2197 /*tolerate_enoent=*/false);
2198 }
2199 DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2200 }
2201
MapMovingSpacePages(size_t start_idx,size_t arr_len,bool from_fault,bool return_on_contention,bool tolerate_enoent)2202 size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
2203 size_t arr_len,
2204 bool from_fault,
2205 bool return_on_contention,
2206 bool tolerate_enoent) {
2207 DCHECK_LT(start_idx, arr_len);
2208 size_t arr_idx = start_idx;
2209 bool wait_for_unmapped = false;
2210 while (arr_idx < arr_len) {
2211 size_t map_count = 0;
2212 uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
2213 // Find a contiguous range that can be mapped with single ioctl.
2214 for (uint32_t i = arr_idx, from_page = cur_state & ~kPageStateMask; i < arr_len;
2215 i++, map_count++, from_page += gPageSize) {
2216 uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
2217 uint32_t cur_from_page = s & ~kPageStateMask;
2218 if (GetPageStateFromWord(s) != PageState::kProcessed || cur_from_page != from_page) {
2219 break;
2220 }
2221 }
2222
2223 if (map_count == 0) {
2224 if (from_fault) {
2225 bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
2226 return mapped ? 1 : 0;
2227 }
2228 // Skip the pages that this thread cannot map.
2229 for (; arr_idx < arr_len; arr_idx++) {
2230 PageState s = GetMovingPageState(arr_idx);
2231 if (s == PageState::kProcessed) {
2232 break;
2233 } else if (s != PageState::kProcessedAndMapped) {
2234 wait_for_unmapped = true;
2235 }
2236 }
2237 } else {
2238 uint32_t from_space_offset = cur_state & ~kPageStateMask;
2239 uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
2240 uint8_t* from_space_start = from_space_begin_ + from_space_offset;
2241 DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2242 DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
2243 size_t mapped_len = CopyIoctl(to_space_start,
2244 from_space_start,
2245 map_count * gPageSize,
2246 return_on_contention,
2247 tolerate_enoent);
2248 for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
2249 // Store is sufficient as anyone storing is doing it with the same value.
2250 moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2251 std::memory_order_release);
2252 }
2253 if (from_fault) {
2254 return DivideByPageSize(mapped_len);
2255 }
2256 // We can return from COPY ioctl with a smaller length also if a page
2257 // was found to be already mapped. But that doesn't count as contention.
2258 if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
2259 return arr_idx - start_idx;
2260 }
2261 }
2262 }
2263 if (wait_for_unmapped) {
2264 for (size_t i = start_idx; i < arr_len; i++) {
2265 PageState s = GetMovingPageState(i);
2266 DCHECK_GT(s, PageState::kProcessed);
2267 uint32_t backoff_count = 0;
2268 while (s != PageState::kProcessedAndMapped) {
2269 BackOff(backoff_count++);
2270 s = GetMovingPageState(i);
2271 }
2272 }
2273 }
2274 return arr_len - start_idx;
2275 }
2276
UpdateNonMovingPage(mirror::Object * first,uint8_t * page,ptrdiff_t from_space_diff,accounting::ContinuousSpaceBitmap * bitmap)2277 void MarkCompact::UpdateNonMovingPage(mirror::Object* first,
2278 uint8_t* page,
2279 ptrdiff_t from_space_diff,
2280 accounting::ContinuousSpaceBitmap* bitmap) {
2281 DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
2282 // For every object found in the page, visit the previous object. This ensures
2283 // that we can visit without checking page-end boundary.
2284 // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2285 // super-class loads require it.
2286 // TODO: Set kVisitNativeRoots to false once we implement concurrent
2287 // compaction
2288 mirror::Object* curr_obj = first;
2289 uint8_t* from_page = page + from_space_diff;
2290 uint8_t* from_page_end = from_page + gPageSize;
2291 bitmap->VisitMarkedRange(
2292 reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize,
2293 reinterpret_cast<uintptr_t>(page + gPageSize),
2294 [&](mirror::Object* next_obj) {
2295 mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(
2296 reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2297 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2298 RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false> visitor(
2299 this, from_obj, from_page, from_page_end);
2300 MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2301 // Native roots shouldn't be visited as they are done when this
2302 // object's beginning was visited in the preceding page.
2303 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2304 visitor, begin_offset, MemberOffset(-1));
2305 } else {
2306 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false> visitor(
2307 this, from_obj, from_page, from_page_end);
2308 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2309 visitor, MemberOffset(0), MemberOffset(-1));
2310 }
2311 curr_obj = next_obj;
2312 });
2313
2314 mirror::Object* from_obj =
2315 reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2316 MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2317 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2318 RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true> visitor(
2319 this, from_obj, from_page, from_page_end);
2320 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2321 visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2322 } else {
2323 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
2324 this, from_obj, from_page, from_page_end);
2325 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(visitor, MemberOffset(0), end_offset);
2326 }
2327 }
2328
UpdateNonMovingSpace()2329 void MarkCompact::UpdateNonMovingSpace() {
2330 TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
2331 // Iterating in reverse ensures that the class pointer in objects which span
2332 // across more than one page gets updated in the end. This is necessary for
2333 // VisitRefsForCompaction() to work correctly.
2334 // TODO: If and when we make non-moving space update concurrent, implement a
2335 // mechanism to remember class pointers for such objects off-heap and pass it
2336 // to VisitRefsForCompaction().
2337 uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
2338 for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2339 mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2340 page -= gPageSize;
2341 // null means there are no objects on the page to update references.
2342 if (obj != nullptr) {
2343 UpdateNonMovingPage(obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
2344 }
2345 }
2346 }
2347
UpdateMovingSpaceBlackAllocations()2348 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2349 // For sliding black pages, we need the first-object, which overlaps with the
2350 // first byte of the page. Additionally, we compute the size of first chunk of
2351 // black objects. This will suffice for most black pages. Unlike, compaction
2352 // pages, here we don't need to pre-compute the offset within first-obj from
2353 // where sliding has to start. That can be calculated using the pre-compact
2354 // address of the page. Therefore, to save space, we store the first chunk's
2355 // size in black_alloc_pages_first_chunk_size_ array.
2356 // For the pages which may have holes after the first chunk, which could happen
2357 // if a new TLAB starts in the middle of the page, we mark the objects in
2358 // the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
2359 // then we use the mark-bitmap for the remainder of the page.
2360 uint8_t* const begin = bump_pointer_space_->Begin();
2361 uint8_t* black_allocs = black_allocations_begin_;
2362 DCHECK_LE(begin, black_allocs);
2363 size_t consumed_blocks_count = 0;
2364 size_t first_block_size;
2365 // Needed only for debug at the end of the function. Hopefully compiler will
2366 // eliminate it otherwise.
2367 size_t num_blocks = 0;
2368 // Get the list of all blocks allocated in the bump-pointer space.
2369 std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2370 &first_block_size);
2371 DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2372 if (block_sizes != nullptr) {
2373 size_t black_page_idx = moving_first_objs_count_;
2374 uint8_t* block_end = begin + first_block_size;
2375 uint32_t remaining_chunk_size = 0;
2376 uint32_t first_chunk_size = 0;
2377 mirror::Object* first_obj = nullptr;
2378 num_blocks = block_sizes->size();
2379 for (size_t block_size : *block_sizes) {
2380 block_end += block_size;
2381 // Skip the blocks that are prior to the black allocations. These will be
2382 // merged with the main-block later.
2383 if (black_allocs >= block_end) {
2384 consumed_blocks_count++;
2385 continue;
2386 }
2387 mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2388 bool set_mark_bit = remaining_chunk_size > 0;
2389 // We don't know how many objects are allocated in the current block. When we hit
2390 // a null assume it's the end. This works as every block is expected to
2391 // have objects allocated linearly using bump-pointer.
2392 // BumpPointerSpace::Walk() also works similarly.
2393 while (black_allocs < block_end
2394 && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2395 // Try to keep instructions which access class instance together to
2396 // avoid reloading the pointer from object.
2397 size_t obj_size = obj->SizeOf();
2398 bytes_scanned_ += obj_size;
2399 obj_size = RoundUp(obj_size, kAlignment);
2400 UpdateClassAfterObjectMap(obj);
2401 if (first_obj == nullptr) {
2402 first_obj = obj;
2403 }
2404 // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2405 // the middle of the page.
2406 if (set_mark_bit) {
2407 moving_space_bitmap_->Set(obj);
2408 }
2409 // Handle objects which cross page boundary, including objects larger
2410 // than page size.
2411 if (remaining_chunk_size + obj_size >= gPageSize) {
2412 set_mark_bit = false;
2413 first_chunk_size += gPageSize - remaining_chunk_size;
2414 remaining_chunk_size += obj_size;
2415 // We should not store first-object and remaining_chunk_size if there were
2416 // unused bytes before this TLAB, in which case we must have already
2417 // stored the values (below).
2418 if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2419 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2420 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2421 }
2422 black_page_idx++;
2423 remaining_chunk_size -= gPageSize;
2424 // Consume an object larger than page size.
2425 while (remaining_chunk_size >= gPageSize) {
2426 black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
2427 first_objs_moving_space_[black_page_idx].Assign(obj);
2428 black_page_idx++;
2429 remaining_chunk_size -= gPageSize;
2430 }
2431 first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2432 first_chunk_size = remaining_chunk_size;
2433 } else {
2434 DCHECK_LE(first_chunk_size, remaining_chunk_size);
2435 first_chunk_size += obj_size;
2436 remaining_chunk_size += obj_size;
2437 }
2438 black_allocs += obj_size;
2439 obj = reinterpret_cast<mirror::Object*>(black_allocs);
2440 }
2441 DCHECK_LE(black_allocs, block_end);
2442 DCHECK_LT(remaining_chunk_size, gPageSize);
2443 // consume the unallocated portion of the block
2444 if (black_allocs < block_end) {
2445 // first-chunk of the current page ends here. Store it.
2446 if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2447 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2448 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2449 }
2450 first_chunk_size = 0;
2451 first_obj = nullptr;
2452 size_t page_remaining = gPageSize - remaining_chunk_size;
2453 size_t block_remaining = block_end - black_allocs;
2454 if (page_remaining <= block_remaining) {
2455 block_remaining -= page_remaining;
2456 // current page and the subsequent empty pages in the block
2457 black_page_idx += 1 + DivideByPageSize(block_remaining);
2458 remaining_chunk_size = ModuloPageSize(block_remaining);
2459 } else {
2460 remaining_chunk_size += block_remaining;
2461 }
2462 black_allocs = block_end;
2463 }
2464 }
2465 if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
2466 // Store the leftover first-chunk, if any, and update page index.
2467 if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2468 black_page_idx++;
2469 } else if (first_chunk_size > 0) {
2470 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2471 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2472 black_page_idx++;
2473 }
2474 }
2475 black_page_count_ = black_page_idx - moving_first_objs_count_;
2476 delete block_sizes;
2477 }
2478 // Update bump-pointer space by consuming all the pre-black blocks into the
2479 // main one.
2480 bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2481 post_compact_end_ - begin,
2482 consumed_blocks_count);
2483 if (kIsDebugBuild) {
2484 size_t moving_space_size = bump_pointer_space_->Size();
2485 size_t los_size = 0;
2486 if (heap_->GetLargeObjectsSpace()) {
2487 los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
2488 }
2489 // The moving-space size is already updated to post-compact size in SetBlockSizes above.
2490 // Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
2491 // in Sweep(), but not with moving-space freed-bytes.
2492 CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
2493 << " moving-space size:" << moving_space_size
2494 << " moving-space bytes-freed:" << black_objs_slide_diff_
2495 << " large-object-space size:" << los_size
2496 << " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
2497 << " num-tlabs-merged:" << consumed_blocks_count
2498 << " main-block-size:" << (post_compact_end_ - begin)
2499 << " total-tlabs-moving-space:" << num_blocks;
2500 }
2501 }
2502
UpdateNonMovingSpaceBlackAllocations()2503 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2504 accounting::ObjectStack* stack = heap_->GetAllocationStack();
2505 const StackReference<mirror::Object>* limit = stack->End();
2506 uint8_t* const space_begin = non_moving_space_->Begin();
2507 for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2508 mirror::Object* obj = it->AsMirrorPtr();
2509 if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2510 non_moving_space_bitmap_->Set(obj);
2511 // Clear so that we don't try to set the bit again in the next GC-cycle.
2512 it->Clear();
2513 size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
2514 uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
2515 mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2516 if (first_obj == nullptr
2517 || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2518 first_objs_non_moving_space_[idx].Assign(obj);
2519 }
2520 mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr();
2521 uint8_t* next_page_begin = page_begin + gPageSize;
2522 if (next_page_first_obj == nullptr
2523 || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2524 size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2525 uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2526 while (next_page_begin < obj_end) {
2527 first_objs_non_moving_space_[idx++].Assign(obj);
2528 next_page_begin += gPageSize;
2529 }
2530 }
2531 // update first_objs count in case we went past non_moving_first_objs_count_
2532 non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2533 }
2534 }
2535 }
2536
2537 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2538 public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector)2539 explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
2540
operator ()(mirror::Object * obj) const2541 void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
2542 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2543 obj,
2544 /*begin_*/nullptr,
2545 /*end_*/nullptr);
2546 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2547 visitor, MemberOffset(0), MemberOffset(-1));
2548 }
2549
Callback(mirror::Object * obj,void * arg)2550 static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2551 reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
2552 }
2553
2554 private:
2555 MarkCompact* const collector_;
2556 };
2557
2558 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
2559 public:
ClassLoaderRootsUpdater(MarkCompact * collector)2560 explicit ClassLoaderRootsUpdater(MarkCompact* collector)
2561 : collector_(collector),
2562 moving_space_begin_(collector->black_dense_end_),
2563 moving_space_end_(collector->moving_space_end_) {}
2564
Visit(ObjPtr<mirror::ClassLoader> class_loader)2565 void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
2566 REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
2567 ClassTable* const class_table = class_loader->GetClassTable();
2568 if (class_table != nullptr) {
2569 // Classes are updated concurrently.
2570 class_table->VisitRoots(*this, /*skip_classes=*/true);
2571 }
2572 }
2573
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2574 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2575 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2576 if (!root->IsNull()) {
2577 VisitRoot(root);
2578 }
2579 }
2580
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2581 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2582 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2583 collector_->UpdateRoot(
2584 root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
2585 }
2586
2587 private:
2588 MarkCompact* collector_;
2589 uint8_t* const moving_space_begin_;
2590 uint8_t* const moving_space_end_;
2591 };
2592
2593 class MarkCompact::LinearAllocPageUpdater {
2594 public:
LinearAllocPageUpdater(MarkCompact * collector)2595 explicit LinearAllocPageUpdater(MarkCompact* collector)
2596 : collector_(collector),
2597 moving_space_begin_(collector->black_dense_end_),
2598 moving_space_end_(collector->moving_space_end_),
2599 last_page_touched_(false) {}
2600
2601 // Update a page in multi-object arena.
MultiObjectArena(uint8_t * page_begin,uint8_t * first_obj)2602 void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
2603 REQUIRES_SHARED(Locks::mutator_lock_) {
2604 DCHECK(first_obj != nullptr);
2605 DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
2606 uint8_t* page_end = page_begin + gPageSize;
2607 uint32_t obj_size;
2608 for (uint8_t* byte = first_obj; byte < page_end;) {
2609 TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
2610 obj_size = header->GetSize();
2611 if (UNLIKELY(obj_size == 0)) {
2612 // No more objects in this page to visit.
2613 last_page_touched_ = byte >= page_begin;
2614 return;
2615 }
2616 uint8_t* obj = byte + sizeof(TrackingHeader);
2617 uint8_t* obj_end = byte + obj_size;
2618 if (header->Is16Aligned()) {
2619 obj = AlignUp(obj, 16);
2620 }
2621 uint8_t* begin_boundary = std::max(obj, page_begin);
2622 uint8_t* end_boundary = std::min(obj_end, page_end);
2623 if (begin_boundary < end_boundary) {
2624 VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
2625 }
2626 if (ArenaAllocator::IsRunningOnMemoryTool()) {
2627 obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
2628 }
2629 byte += RoundUp(obj_size, LinearAlloc::kAlignment);
2630 }
2631 last_page_touched_ = true;
2632 }
2633
2634 // This version is only used for cases where the entire page is filled with
2635 // GC-roots. For example, class-table and intern-table.
SingleObjectArena(uint8_t * page_begin,size_t page_size)2636 void SingleObjectArena(uint8_t* page_begin, size_t page_size)
2637 REQUIRES_SHARED(Locks::mutator_lock_) {
2638 static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
2639 DCHECK_ALIGNED(page_begin, kAlignment);
2640 // Least significant bits are used by class-table.
2641 static constexpr uint32_t kMask = kObjectAlignment - 1;
2642 size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
2643 uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
2644 for (size_t i = 0; i < num_roots; root_ptr++, i++) {
2645 uint32_t word = *root_ptr;
2646 if (word != 0) {
2647 uint32_t lsbs = word & kMask;
2648 word &= ~kMask;
2649 VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
2650 *root_ptr = word | lsbs;
2651 last_page_touched_ = true;
2652 }
2653 }
2654 }
2655
WasLastPageTouched() const2656 bool WasLastPageTouched() const { return last_page_touched_; }
2657
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2658 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
2659 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2660 if (!root->IsNull()) {
2661 VisitRoot(root);
2662 }
2663 }
2664
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2665 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
2666 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2667 mirror::Object* old_ref = root->AsMirrorPtr();
2668 DCHECK_NE(old_ref, nullptr);
2669 if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
2670 mirror::Object* new_ref = old_ref;
2671 if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
2672 new_ref = collector_->PostCompactBlackObjAddr(old_ref);
2673 } else if (collector_->live_words_bitmap_->Test(old_ref)) {
2674 DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
2675 << "ref:" << old_ref << " root:" << root;
2676 new_ref = collector_->PostCompactOldObjAddr(old_ref);
2677 }
2678 if (old_ref != new_ref) {
2679 root->Assign(new_ref);
2680 }
2681 }
2682 }
2683
2684 private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const2685 void VisitObject(LinearAllocKind kind,
2686 void* obj,
2687 uint8_t* start_boundary,
2688 uint8_t* end_boundary) const ALWAYS_INLINE
2689 REQUIRES_SHARED(Locks::mutator_lock_) {
2690 switch (kind) {
2691 case LinearAllocKind::kNoGCRoots:
2692 break;
2693 case LinearAllocKind::kGCRootArray:
2694 {
2695 GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
2696 GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
2697 for (; root < last; root++) {
2698 VisitRootIfNonNull(root->AddressWithoutBarrier());
2699 }
2700 }
2701 break;
2702 case LinearAllocKind::kArtMethodArray:
2703 {
2704 LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
2705 // Old methods are clobbered in debug builds. Check size to confirm if the array
2706 // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
2707 if (array->size() > 0) {
2708 if (collector_->pointer_size_ == PointerSize::k64) {
2709 ArtMethod::VisitArrayRoots<PointerSize::k64>(
2710 *this, start_boundary, end_boundary, array);
2711 } else {
2712 DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
2713 ArtMethod::VisitArrayRoots<PointerSize::k32>(
2714 *this, start_boundary, end_boundary, array);
2715 }
2716 }
2717 }
2718 break;
2719 case LinearAllocKind::kArtMethod:
2720 ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
2721 break;
2722 case LinearAllocKind::kArtFieldArray:
2723 ArtField::VisitArrayRoots(*this,
2724 start_boundary,
2725 end_boundary,
2726 static_cast<LengthPrefixedArray<ArtField>*>(obj));
2727 break;
2728 case LinearAllocKind::kDexCacheArray:
2729 {
2730 mirror::DexCachePair<mirror::Object>* first =
2731 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
2732 mirror::DexCachePair<mirror::Object>* last =
2733 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
2734 mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
2735 }
2736 }
2737 }
2738
2739 MarkCompact* const collector_;
2740 // Cache to speed up checking if GC-root is in moving space or not.
2741 uint8_t* const moving_space_begin_;
2742 uint8_t* const moving_space_end_;
2743 // Whether the last page was touched or not.
2744 bool last_page_touched_ = false;
2745 };
2746
UpdateClassTableClasses(Runtime * runtime,bool immune_class_table_only)2747 void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
2748 // If the process is debuggable then redefinition is allowed, which may mean
2749 // pre-zygote-fork class-tables may have pointer to class in moving-space.
2750 // So visit classes from class-sets that are not in linear-alloc arena-pool.
2751 if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
2752 ClassLinker* linker = runtime->GetClassLinker();
2753 ClassLoaderRootsUpdater updater(this);
2754 GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
2755 auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
2756 if (!set.empty()) {
2757 return immune_class_table_only ?
2758 immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
2759 !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
2760 }
2761 return false;
2762 };
2763 linker->VisitClassTables([cond, &updater](ClassTable* table)
2764 REQUIRES_SHARED(Locks::mutator_lock_) {
2765 table->VisitClassesIfConditionMet(cond, updater);
2766 });
2767 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
2768 linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
2769 }
2770 }
2771
CompactionPause()2772 void MarkCompact::CompactionPause() {
2773 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2774 Runtime* runtime = Runtime::Current();
2775 non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
2776 if (kIsDebugBuild) {
2777 DCHECK_EQ(thread_running_gc_, Thread::Current());
2778 // TODO(Simulator): Test that this should not operate on the simulated stack when the simulator
2779 // supports mark compact.
2780 stack_low_addr_ = thread_running_gc_->GetStackEnd<kNativeStackType>();
2781 stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_)
2782 + thread_running_gc_->GetUsableStackSize<kNativeStackType>();
2783 }
2784 {
2785 TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
2786 ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2787 // Refresh data-structures to catch-up on allocations that may have
2788 // happened since marking-phase pause.
2789 // There could be several TLABs that got allocated since marking pause. We
2790 // don't want to compact them and instead update the TLAB info in TLS and
2791 // let mutators continue to use the TLABs.
2792 // We need to set all the bits in live-words bitmap corresponding to allocated
2793 // objects. Also, we need to find the objects that are overlapping with
2794 // page-begin boundaries. Unlike objects allocated before
2795 // black_allocations_begin_, which can be identified via mark-bitmap, we can get
2796 // this info only via walking the space past black_allocations_begin_, which
2797 // involves fetching object size.
2798 // TODO: We can reduce the time spent on this in a pause by performing one
2799 // round of this concurrently prior to the pause.
2800 UpdateMovingSpaceBlackAllocations();
2801 // Iterate over the allocation_stack_, for every object in the non-moving
2802 // space:
2803 // 1. Mark the object in live bitmap
2804 // 2. Erase the object from allocation stack
2805 // 3. In the corresponding page, if the first-object vector needs updating
2806 // then do so.
2807 UpdateNonMovingSpaceBlackAllocations();
2808 // This store is visible to mutator (or uffd worker threads) as the mutator
2809 // lock's unlock guarantees that.
2810 compacting_ = true;
2811 // Start updating roots and system weaks now.
2812 heap_->GetReferenceProcessor()->UpdateRoots(this);
2813 }
2814 {
2815 // TODO: Immune space updation has to happen either before or after
2816 // remapping pre-compact pages to from-space. And depending on when it's
2817 // done, we have to invoke VisitRefsForCompaction() with or without
2818 // read-barrier.
2819 TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
2820 accounting::CardTable* const card_table = heap_->GetCardTable();
2821 for (auto& space : immune_spaces_.GetSpaces()) {
2822 DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
2823 accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
2824 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
2825 // Having zygote-space indicates that the first zygote fork has taken
2826 // place and that the classes/dex-caches in immune-spaces may have allocations
2827 // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
2828 // non-userfaultfd visited private-anonymous mappings. Visit them here.
2829 ImmuneSpaceUpdateObjVisitor visitor(this);
2830 if (table != nullptr) {
2831 table->ProcessCards();
2832 table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
2833 } else {
2834 WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2835 card_table->Scan<false>(
2836 live_bitmap,
2837 space->Begin(),
2838 space->Limit(),
2839 visitor,
2840 accounting::CardTable::kCardDirty - 1);
2841 }
2842 }
2843 }
2844
2845 {
2846 TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
2847 runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
2848 runtime->VisitNonThreadRoots(this);
2849 {
2850 ClassLinker* linker = runtime->GetClassLinker();
2851 ClassLoaderRootsUpdater updater(this);
2852 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
2853 linker->VisitClassLoaders(&updater);
2854 linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
2855 }
2856 SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
2857
2858 bool has_zygote_space = heap_->HasZygoteSpace();
2859 GcVisitedArenaPool* arena_pool =
2860 static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
2861 // Update immune/pre-zygote class-tables in case class redefinition took
2862 // place. pre-zygote class-tables that are not in immune spaces are updated
2863 // below if we are in fallback-mode or if there is no zygote space. So in
2864 // that case only visit class-tables that are there in immune-spaces.
2865 UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
2866
2867 // Acquire arena-pool's lock, which should be released after the pool is
2868 // userfaultfd registered. This is to ensure that no new arenas are
2869 // allocated and used in between. Since they will not be captured in
2870 // linear_alloc_arenas_ below, we will miss updating their pages. The same
2871 // reason also applies to new allocations within the existing arena which
2872 // may change last_byte.
2873 // Since we are in a STW pause, this shouldn't happen anyways, but holding
2874 // the lock confirms it.
2875 // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
2876 // doesn't fail once it is available for ReaderWriterMutex.
2877 WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
2878
2879 // TODO: Find out why it's not sufficient to visit native roots of immune
2880 // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
2881 // Is it possible that some native root starts getting pointed to by some object
2882 // in moving space after fork? Or are we missing a write-barrier somewhere
2883 // when a native root is updated?
2884 auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
2885 REQUIRES_SHARED(Locks::mutator_lock_) {
2886 LinearAllocPageUpdater updater(this);
2887 if (first_obj != nullptr) {
2888 updater.MultiObjectArena(page_begin, first_obj);
2889 } else {
2890 updater.SingleObjectArena(page_begin, page_size);
2891 }
2892 };
2893 if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
2894 // Besides fallback-mode, visit linear-alloc space in the pause for zygote
2895 // processes prior to first fork (that's when zygote space gets created).
2896 if (kIsDebugBuild && IsValidFd(uffd_)) {
2897 // All arenas allocated so far are expected to be pre-zygote fork.
2898 arena_pool->ForEachAllocatedArena(
2899 [](const TrackedArena& arena)
2900 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
2901 }
2902 arena_pool->VisitRoots(arena_visitor);
2903 } else {
2904 // Inform the arena-pool that compaction is going on. So the TrackedArena
2905 // objects corresponding to the arenas that are freed shouldn't be deleted
2906 // immediately. We will do that in FinishPhase(). This is to avoid ABA
2907 // problem.
2908 arena_pool->DeferArenaFreeing();
2909 arena_pool->ForEachAllocatedArena(
2910 [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
2911 REQUIRES_SHARED(Locks::mutator_lock_) {
2912 // The pre-zygote fork arenas are not visited concurrently in the
2913 // zygote children processes. The native roots of the dirty objects
2914 // are visited during immune space visit below.
2915 if (!arena.IsPreZygoteForkArena()) {
2916 uint8_t* last_byte = arena.GetLastUsedByte();
2917 auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
2918 CHECK(ret.second);
2919 } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
2920 // Pre-zygote class-table and intern-table don't need to be updated.
2921 // TODO: Explore the possibility of using /proc/self/pagemap to
2922 // fetch which pages in these arenas are private-dirty and then only
2923 // visit those pages. To optimize it further, we can keep all
2924 // pre-zygote arenas in a single memory range so that just one read
2925 // from pagemap is sufficient.
2926 arena.VisitRoots(arena_visitor);
2927 }
2928 });
2929 }
2930 // Release order wrt to mutator threads' SIGBUS handler load.
2931 sigbus_in_progress_count_[0].store(0, std::memory_order_relaxed);
2932 sigbus_in_progress_count_[1].store(0, std::memory_order_release);
2933 KernelPreparation();
2934 }
2935
2936 UpdateNonMovingSpace();
2937 // fallback mode
2938 if (uffd_ == kFallbackMode) {
2939 CompactMovingSpace<kFallbackMode>(nullptr);
2940
2941 int32_t freed_bytes = black_objs_slide_diff_;
2942 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
2943 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
2944 } else {
2945 DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
2946 }
2947 stack_low_addr_ = nullptr;
2948 }
2949
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size)2950 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, uint8_t* from_addr, size_t map_size) {
2951 int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
2952 if (gHaveMremapDontunmap) {
2953 mremap_flags |= MREMAP_DONTUNMAP;
2954 }
2955
2956 void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
2957 CHECK_EQ(ret, static_cast<void*>(from_addr))
2958 << "mremap to move pages failed: " << strerror(errno)
2959 << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
2960
2961 if (!gHaveMremapDontunmap) {
2962 // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
2963 // the moving space again.
2964 int mmap_flags = MAP_FIXED;
2965 // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
2966 // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
2967 // available, to avoid unmapping someone else' mapping and then causing
2968 // crashes elsewhere.
2969 mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
2970 ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
2971 CHECK_EQ(ret, static_cast<void*>(to_addr))
2972 << "mmap for moving space failed: " << strerror(errno);
2973 }
2974 }
2975
KernelPreparation()2976 void MarkCompact::KernelPreparation() {
2977 TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
2978 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
2979 size_t moving_space_size = bump_pointer_space_->Capacity();
2980 size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
2981 DCHECK_LE(moving_space_register_sz, moving_space_size);
2982
2983 KernelPrepareRangeForUffd(moving_space_begin, from_space_begin_, moving_space_size);
2984
2985 if (IsValidFd(uffd_)) {
2986 if (moving_space_register_sz > 0) {
2987 // mremap clears 'anon_vma' field of anonymous mappings. If we
2988 // uffd-register only the used portion of the space, then the vma gets
2989 // split (between used and unused portions) and as soon as pages are
2990 // mapped to the vmas, they get different `anon_vma` assigned, which
2991 // ensures that the two vmas cannot merge after we uffd-unregister the
2992 // used portion. OTOH, registering the entire space avoids the split, but
2993 // unnecessarily causes userfaults on allocations.
2994 // By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
2995 // the vma-split in uffd-register. This ensures that when we unregister
2996 // the used portion after compaction, the two split vmas merge. This is
2997 // necessary for the mremap of the next GC cycle to not fail due to having
2998 // more than one vma in the source range.
2999 //
3000 // Fault in address aligned to PMD size so that in case THP is enabled,
3001 // we don't mistakenly fault a page in beginning portion that will be
3002 // registered with uffd. If the alignment takes us beyond the space, then
3003 // fault the first page and madvise it.
3004 size_t pmd_size = Heap::GetPMDSize();
3005 uint8_t* fault_in_addr = AlignUp(moving_space_begin + moving_space_register_sz, pmd_size);
3006 if (bump_pointer_space_->Contains(reinterpret_cast<mirror::Object*>(fault_in_addr))) {
3007 *const_cast<volatile uint8_t*>(fault_in_addr) = 0;
3008 } else {
3009 DCHECK_ALIGNED_PARAM(moving_space_begin, gPageSize);
3010 *const_cast<volatile uint8_t*>(moving_space_begin) = 0;
3011 madvise(moving_space_begin, pmd_size, MADV_DONTNEED);
3012 }
3013 // Register the moving space with userfaultfd.
3014 RegisterUffd(moving_space_begin, moving_space_register_sz);
3015 // madvise ensures that if any page gets mapped (only possible if some
3016 // thread is reading the page(s) without trying to make sense as we hold
3017 // mutator-lock exclusively) between mremap and uffd-registration, then
3018 // it gets zapped so that the map is empty and ready for userfaults. If
3019 // we could mremap after uffd-registration (like in case of linear-alloc
3020 // space below) then we wouldn't need it. But since we don't register the
3021 // entire space, we can't do that.
3022 madvise(moving_space_begin, moving_space_register_sz, MADV_DONTNEED);
3023 }
3024 // Prepare linear-alloc for concurrent compaction.
3025 for (auto& data : linear_alloc_spaces_data_) {
3026 DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
3027 // There could be threads running in suspended mode when the compaction
3028 // pause is being executed. In order to make the userfaultfd setup atomic,
3029 // the registration has to be done *before* moving the pages to shadow map.
3030 RegisterUffd(data.begin_, data.shadow_.Size());
3031 KernelPrepareRangeForUffd(data.begin_, data.shadow_.Begin(), data.shadow_.Size());
3032 }
3033 }
3034 }
3035
SigbusHandler(siginfo_t * info)3036 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3037 class ScopedInProgressCount {
3038 public:
3039 explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3040 // Increment the count only if compaction is not done yet.
3041 for (idx_ = 0; idx_ < 2; idx_++) {
3042 SigbusCounterType prev =
3043 collector_->sigbus_in_progress_count_[idx_].load(std::memory_order_relaxed);
3044 while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3045 if (collector_->sigbus_in_progress_count_[idx_].compare_exchange_strong(
3046 prev, prev + 1, std::memory_order_acquire)) {
3047 DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3048 return;
3049 }
3050 }
3051 }
3052 }
3053
3054 bool TolerateEnoent() const { return idx_ == 1; }
3055
3056 bool IsCompactionDone() const { return idx_ == 2; }
3057
3058 ~ScopedInProgressCount() {
3059 if (idx_ < 2) {
3060 collector_->sigbus_in_progress_count_[idx_].fetch_sub(1, std::memory_order_release);
3061 }
3062 }
3063
3064 private:
3065 MarkCompact* const collector_;
3066 uint8_t idx_;
3067 };
3068
3069 if (info->si_code != BUS_ADRERR) {
3070 // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3071 // handled here.
3072 return false;
3073 }
3074
3075 ScopedInProgressCount spc(this);
3076 uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
3077 if (!spc.IsCompactionDone()) {
3078 if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3079 Thread* self = Thread::Current();
3080 Locks::mutator_lock_->AssertSharedHeld(self);
3081 size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3082 ConcurrentlyProcessMovingPage(fault_page,
3083 self->GetThreadLocalGcBuffer(),
3084 nr_moving_space_used_pages,
3085 spc.TolerateEnoent());
3086 return true;
3087 } else {
3088 // Find the linear-alloc space containing fault-addr
3089 for (auto& data : linear_alloc_spaces_data_) {
3090 if (data.begin_ <= fault_page && data.end_ > fault_page) {
3091 ConcurrentlyProcessLinearAllocPage(fault_page, spc.TolerateEnoent());
3092 return true;
3093 }
3094 }
3095 // Fault address doesn't belong to either moving-space or linear-alloc.
3096 return false;
3097 }
3098 } else {
3099 // We may spuriously get SIGBUS fault, which was initiated before the
3100 // compaction was finished, but ends up here. In that case, if the fault
3101 // address is valid then consider it handled.
3102 return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3103 linear_alloc_spaces_data_.end() !=
3104 std::find_if(linear_alloc_spaces_data_.begin(),
3105 linear_alloc_spaces_data_.end(),
3106 [fault_page](const LinearAllocSpaceData& data) {
3107 return data.begin_ <= fault_page && data.end_ > fault_page;
3108 });
3109 }
3110 }
3111
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages,bool tolerate_enoent)3112 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3113 uint8_t* buf,
3114 size_t nr_moving_space_used_pages,
3115 bool tolerate_enoent) {
3116 Thread* self = Thread::Current();
3117 uint8_t* unused_space_begin =
3118 bump_pointer_space_->Begin() + nr_moving_space_used_pages * gPageSize;
3119 DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
3120 if (fault_page >= unused_space_begin) {
3121 // There is a race which allows more than one thread to install a
3122 // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3123 // the ioctl and move on.
3124 ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3125 return;
3126 }
3127 size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin());
3128 DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
3129 mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3130 if (first_obj == nullptr) {
3131 DCHECK_GT(fault_page, post_compact_end_);
3132 // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
3133 uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
3134 if (fault_page < self->GetTlabStart() || fault_page >= end) {
3135 end = fault_page + gPageSize;
3136 }
3137 size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
3138 size_t length = 0;
3139 for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
3140 uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
3141 if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
3142 DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
3143 break;
3144 }
3145 }
3146 if (length > 0) {
3147 length = ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, tolerate_enoent);
3148 for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
3149 moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3150 std::memory_order_release);
3151 }
3152 }
3153 return;
3154 }
3155
3156 uint32_t raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3157 uint32_t backoff_count = 0;
3158 PageState state;
3159 while (true) {
3160 state = GetPageStateFromWord(raw_state);
3161 if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
3162 state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
3163 // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
3164 // The wait is not expected to be long as the read state indicates that the other
3165 // thread is actively working on the page.
3166 BackOff(backoff_count++);
3167 raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3168 } else if (state == PageState::kProcessedAndMapped) {
3169 // Nothing to do.
3170 break;
3171 } else {
3172 // Acquire order to ensure we don't start writing to a page, which could
3173 // be shared, before the CAS is successful.
3174 if (state == PageState::kUnprocessed &&
3175 moving_pages_status_[page_idx].compare_exchange_strong(
3176 raw_state,
3177 static_cast<uint8_t>(PageState::kMutatorProcessing),
3178 std::memory_order_acquire)) {
3179 if (fault_page < black_dense_end_) {
3180 UpdateNonMovingPage(first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
3181 buf = fault_page + from_space_slide_diff_;
3182 } else {
3183 if (UNLIKELY(buf == nullptr)) {
3184 uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3185 // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3186 CHECK_LE(idx, kMutatorCompactionBufferCount);
3187 buf = compaction_buffers_map_.Begin() + idx * gPageSize;
3188 DCHECK(compaction_buffers_map_.HasAddress(buf));
3189 self->SetThreadLocalGcBuffer(buf);
3190 }
3191
3192 if (fault_page < post_compact_end_) {
3193 // The page has to be compacted.
3194 CompactPage(first_obj,
3195 pre_compact_offset_moving_space_[page_idx],
3196 buf,
3197 /*needs_memset_zero=*/true);
3198 } else {
3199 DCHECK_NE(first_obj, nullptr);
3200 DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3201 uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3202 uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
3203 mirror::Object* next_page_first_obj = nullptr;
3204 if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
3205 next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
3206 }
3207 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
3208 SlideBlackPage(first_obj,
3209 next_page_first_obj,
3210 first_chunk_size,
3211 pre_compact_page,
3212 buf,
3213 /*needs_memset_zero=*/true);
3214 }
3215 }
3216 // Nobody else would simultaneously modify this page's state so an
3217 // atomic store is sufficient. Use 'release' order to guarantee that
3218 // loads/stores to the page are finished before this store. Since the
3219 // mutator used its own buffer for the processing, there is no reason to
3220 // put its index in the status of the page. Also, the mutator is going
3221 // to immediately map the page, so that info is not needed.
3222 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
3223 std::memory_order_release);
3224 CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false, tolerate_enoent);
3225 // Store is sufficient as no other thread modifies the status at this stage.
3226 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3227 std::memory_order_release);
3228 break;
3229 }
3230 state = GetPageStateFromWord(raw_state);
3231 if (state == PageState::kProcessed) {
3232 size_t arr_len = moving_first_objs_count_ + black_page_count_;
3233 // The page is processed but not mapped. We should map it. The release
3234 // order used in MapMovingSpacePages will ensure that the increment to
3235 // moving_compaction_in_progress is done first.
3236 if (MapMovingSpacePages(page_idx,
3237 arr_len,
3238 /*from_fault=*/true,
3239 /*return_on_contention=*/false,
3240 tolerate_enoent) > 0) {
3241 break;
3242 }
3243 raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3244 }
3245 }
3246 }
3247 }
3248
MapUpdatedLinearAllocPages(uint8_t * start_page,uint8_t * start_shadow_page,Atomic<PageState> * state,size_t length,bool free_pages,bool single_ioctl,bool tolerate_enoent)3249 bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
3250 uint8_t* start_shadow_page,
3251 Atomic<PageState>* state,
3252 size_t length,
3253 bool free_pages,
3254 bool single_ioctl,
3255 bool tolerate_enoent) {
3256 DCHECK_ALIGNED_PARAM(length, gPageSize);
3257 Atomic<PageState>* madv_state = state;
3258 size_t madv_len = length;
3259 uint8_t* madv_start = start_shadow_page;
3260 bool check_state_for_madv = false;
3261 uint8_t* end_page = start_page + length;
3262 while (start_page < end_page) {
3263 size_t map_len = 0;
3264 // Find a contiguous range of pages that we can map in single ioctl.
3265 for (Atomic<PageState>* cur_state = state;
3266 map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
3267 map_len += gPageSize, cur_state++) {
3268 // No body.
3269 }
3270
3271 if (map_len == 0) {
3272 if (single_ioctl) {
3273 return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
3274 }
3275 // Skip all the pages that this thread can't map.
3276 while (length > 0) {
3277 PageState s = state->load(std::memory_order_relaxed);
3278 if (s == PageState::kProcessed) {
3279 break;
3280 }
3281 // If we find any page which is being processed or mapped (only possible by a mutator(s))
3282 // then we need to re-check the page-state and, if needed, wait for the state to change
3283 // to 'mapped', before the shadow pages are reclaimed.
3284 check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
3285 state++;
3286 length -= gPageSize;
3287 start_shadow_page += gPageSize;
3288 start_page += gPageSize;
3289 }
3290 } else {
3291 map_len = CopyIoctl(start_page,
3292 start_shadow_page,
3293 map_len,
3294 /*return_on_contention=*/false,
3295 tolerate_enoent);
3296 DCHECK_NE(map_len, 0u);
3297 // Declare that the pages are ready to be accessed. Store is sufficient
3298 // as any thread will be storing the same value.
3299 for (size_t l = 0; l < map_len; l += gPageSize, state++) {
3300 PageState s = state->load(std::memory_order_relaxed);
3301 DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped) << "state:" << s;
3302 state->store(PageState::kProcessedAndMapped, std::memory_order_release);
3303 }
3304 if (single_ioctl) {
3305 break;
3306 }
3307 start_page += map_len;
3308 start_shadow_page += map_len;
3309 length -= map_len;
3310 // state is already updated above.
3311 }
3312 }
3313 if (free_pages) {
3314 if (check_state_for_madv) {
3315 // Wait until all the pages are mapped before releasing them. This is needed to be
3316 // checked only if some mutators were found to be concurrently mapping pages earlier.
3317 for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
3318 uint32_t backoff_count = 0;
3319 PageState s = madv_state->load(std::memory_order_relaxed);
3320 while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
3321 BackOff(backoff_count++);
3322 s = madv_state->load(std::memory_order_relaxed);
3323 }
3324 }
3325 }
3326 ZeroAndReleaseMemory(madv_start, madv_len);
3327 }
3328 return true;
3329 }
3330
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool tolerate_enoent)3331 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool tolerate_enoent) {
3332 auto arena_iter = linear_alloc_arenas_.end();
3333 {
3334 TrackedArena temp_arena(fault_page);
3335 arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3336 arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3337 : linear_alloc_arenas_.end();
3338 }
3339 // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
3340 // because a thread trying to access the page and as a result causing this
3341 // userfault confirms that nobody can delete the corresponding arena and
3342 // release its pages.
3343 // NOTE: We may have some memory range be recycled several times during a
3344 // compaction cycle, thereby potentially causing userfault on the same page
3345 // several times. That's not a problem as all of them (except for possibly the
3346 // first one) would require us mapping a zero-page, which we do without updating
3347 // the 'state_arr'.
3348 if (arena_iter == linear_alloc_arenas_.end() ||
3349 arena_iter->first->IsWaitingForDeletion() ||
3350 arena_iter->second <= fault_page) {
3351 // Fault page isn't in any of the arenas that existed before we started
3352 // compaction. So map zeropage and return.
3353 ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3354 } else {
3355 // Find the linear-alloc space containing fault-page
3356 LinearAllocSpaceData* space_data = nullptr;
3357 for (auto& data : linear_alloc_spaces_data_) {
3358 if (data.begin_ <= fault_page && fault_page < data.end_) {
3359 space_data = &data;
3360 break;
3361 }
3362 }
3363 DCHECK_NE(space_data, nullptr);
3364 ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3365 size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
3366 Atomic<PageState>* state_arr =
3367 reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3368 PageState state = state_arr[page_idx].load(std::memory_order_acquire);
3369 uint32_t backoff_count = 0;
3370 while (true) {
3371 switch (state) {
3372 case PageState::kUnprocessed: {
3373 // Acquire order to ensure we don't start writing to shadow map, which is
3374 // shared, before the CAS is successful.
3375 if (state_arr[page_idx].compare_exchange_strong(
3376 state, PageState::kProcessing, std::memory_order_acquire)) {
3377 LinearAllocPageUpdater updater(this);
3378 uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3379 // null first_obj indicates that it's a page from arena for
3380 // intern-table/class-table. So first object isn't required.
3381 if (first_obj != nullptr) {
3382 updater.MultiObjectArena(fault_page + diff, first_obj + diff);
3383 } else {
3384 updater.SingleObjectArena(fault_page + diff, gPageSize);
3385 }
3386 if (updater.WasLastPageTouched()) {
3387 state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3388 state = PageState::kProcessed;
3389 continue;
3390 } else {
3391 // If the page wasn't touched, then it means it is empty and
3392 // is most likely not present on the shadow-side. Furthermore,
3393 // since the shadow is also userfaultfd registered doing copy
3394 // ioctl fails as the copy-from-user in the kernel will cause
3395 // userfault. Instead, just map a zeropage, which is not only
3396 // correct but also efficient as it avoids unnecessary memcpy
3397 // in the kernel.
3398 if (ZeropageIoctl(fault_page,
3399 gPageSize,
3400 /*tolerate_eexist=*/false,
3401 tolerate_enoent)) {
3402 state_arr[page_idx].store(PageState::kProcessedAndMapped,
3403 std::memory_order_release);
3404 }
3405 return;
3406 }
3407 }
3408 }
3409 continue;
3410 case PageState::kProcessed:
3411 // Map as many pages as possible in a single ioctl, without spending
3412 // time freeing pages.
3413 if (MapUpdatedLinearAllocPages(fault_page,
3414 fault_page + diff,
3415 state_arr + page_idx,
3416 space_data->end_ - fault_page,
3417 /*free_pages=*/false,
3418 /*single_ioctl=*/true,
3419 tolerate_enoent)) {
3420 return;
3421 }
3422 // fault_page was not mapped by this thread (some other thread claimed
3423 // it). Wait for it to be mapped before returning.
3424 FALLTHROUGH_INTENDED;
3425 case PageState::kProcessing:
3426 case PageState::kProcessingAndMapping:
3427 case PageState::kProcessedAndMapping:
3428 // Wait for the page to be mapped before returning.
3429 BackOff(backoff_count++);
3430 state = state_arr[page_idx].load(std::memory_order_acquire);
3431 continue;
3432 case PageState::kMutatorProcessing:
3433 LOG(FATAL) << "Unreachable";
3434 UNREACHABLE();
3435 case PageState::kProcessedAndMapped:
3436 // Somebody else took care of the page.
3437 return;
3438 }
3439 break;
3440 }
3441 }
3442 }
3443
ProcessLinearAlloc()3444 void MarkCompact::ProcessLinearAlloc() {
3445 GcVisitedArenaPool* arena_pool =
3446 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3447 DCHECK_EQ(thread_running_gc_, Thread::Current());
3448 uint8_t* unmapped_range_start = nullptr;
3449 uint8_t* unmapped_range_end = nullptr;
3450 // Pointer to the linear-alloc space containing the current arena in the loop
3451 // below. Also helps in ensuring that two arenas, which are contiguous in
3452 // address space but are from different linear-alloc spaces, are not coalesced
3453 // into one range for mapping purpose.
3454 LinearAllocSpaceData* space_data = nullptr;
3455 Atomic<PageState>* state_arr = nullptr;
3456 ptrdiff_t diff = 0;
3457
3458 auto map_pages = [&]() {
3459 DCHECK_NE(diff, 0);
3460 DCHECK_NE(space_data, nullptr);
3461 DCHECK_GE(unmapped_range_start, space_data->begin_);
3462 DCHECK_LT(unmapped_range_start, space_data->end_);
3463 DCHECK_GT(unmapped_range_end, space_data->begin_);
3464 DCHECK_LE(unmapped_range_end, space_data->end_);
3465 DCHECK_LT(unmapped_range_start, unmapped_range_end);
3466 DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
3467 size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
3468 MapUpdatedLinearAllocPages(unmapped_range_start,
3469 unmapped_range_start + diff,
3470 state_arr + page_idx,
3471 unmapped_range_end - unmapped_range_start,
3472 /*free_pages=*/true,
3473 /*single_ioctl=*/false,
3474 /*tolerate_enoent=*/false);
3475 };
3476 for (auto& pair : linear_alloc_arenas_) {
3477 const TrackedArena* arena = pair.first;
3478 size_t arena_size = arena->Size();
3479 uint8_t* arena_begin = arena->Begin();
3480 // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
3481 // in that order.
3482 DCHECK_LE(unmapped_range_end, arena_begin);
3483 DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
3484 << "space-begin:" << static_cast<void*>(space_data->begin_)
3485 << " arena-begin:" << static_cast<void*>(arena_begin);
3486 if (space_data == nullptr || space_data->end_ <= arena_begin) {
3487 // Map the processed arenas as we are switching to another space.
3488 if (space_data != nullptr && unmapped_range_end != nullptr) {
3489 map_pages();
3490 unmapped_range_end = nullptr;
3491 }
3492 // Find the linear-alloc space containing the arena
3493 LinearAllocSpaceData* curr_space_data = space_data;
3494 for (auto& data : linear_alloc_spaces_data_) {
3495 if (data.begin_ <= arena_begin && arena_begin < data.end_) {
3496 // Since arenas are sorted, the next space should be higher in address
3497 // order than the current one.
3498 DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
3499 diff = data.shadow_.Begin() - data.begin_;
3500 state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
3501 space_data = &data;
3502 break;
3503 }
3504 }
3505 CHECK_NE(space_data, curr_space_data)
3506 << "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
3507 }
3508 // Map the processed arenas if we found a hole within the current space.
3509 if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
3510 map_pages();
3511 unmapped_range_end = nullptr;
3512 }
3513 if (unmapped_range_end == nullptr) {
3514 unmapped_range_start = unmapped_range_end = arena_begin;
3515 }
3516 DCHECK_NE(unmapped_range_start, nullptr);
3517 // It's ok to include all arenas in the unmapped range. Since the
3518 // corresponding state bytes will be kUnprocessed, we will skip calling
3519 // ioctl and madvise on arenas which are waiting to be deleted.
3520 unmapped_range_end += arena_size;
3521 {
3522 // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
3523 // does not get deleted at the same time. If this critical section is too
3524 // long and impacts mutator response time, then we get rid of this lock by
3525 // holding onto memory ranges of all deleted (since compaction pause)
3526 // arenas until completion finishes.
3527 ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
3528 // If any arenas were freed since compaction pause then skip them from
3529 // visiting.
3530 if (arena->IsWaitingForDeletion()) {
3531 continue;
3532 }
3533 uint8_t* last_byte = pair.second;
3534 DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
3535 auto visitor = [space_data, last_byte, diff, this, state_arr](
3536 uint8_t* page_begin,
3537 uint8_t* first_obj,
3538 size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
3539 // No need to process pages past last_byte as they already have updated
3540 // gc-roots, if any.
3541 if (page_begin >= last_byte) {
3542 return;
3543 }
3544 LinearAllocPageUpdater updater(this);
3545 size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
3546 DCHECK_LT(page_idx, space_data->page_status_map_.Size());
3547 PageState expected_state = PageState::kUnprocessed;
3548 // Acquire order to ensure that we don't start accessing the shadow page,
3549 // which is shared with other threads, prior to CAS. Also, for same
3550 // reason, we used 'release' order for changing the state to 'processed'.
3551 if (state_arr[page_idx].compare_exchange_strong(
3552 expected_state, PageState::kProcessing, std::memory_order_acquire)) {
3553 // null first_obj indicates that it's a page from arena for
3554 // intern-table/class-table. So first object isn't required.
3555 if (first_obj != nullptr) {
3556 updater.MultiObjectArena(page_begin + diff, first_obj + diff);
3557 } else {
3558 DCHECK_EQ(page_size, gPageSize);
3559 updater.SingleObjectArena(page_begin + diff, page_size);
3560 }
3561 expected_state = PageState::kProcessing;
3562 // Store is sufficient as no other thread could be modifying it. Use
3563 // release order to ensure that the writes to shadow page are
3564 // committed to memory before.
3565 if (updater.WasLastPageTouched()) {
3566 state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3567 } else {
3568 // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
3569 ZeropageIoctl(
3570 page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3571 // Ioctl will act as release fence.
3572 state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
3573 }
3574 }
3575 };
3576
3577 arena->VisitRoots(visitor);
3578 }
3579 }
3580 if (unmapped_range_end > unmapped_range_start) {
3581 // Map remaining pages.
3582 map_pages();
3583 }
3584 }
3585
RegisterUffd(void * addr,size_t size)3586 void MarkCompact::RegisterUffd(void* addr, size_t size) {
3587 DCHECK(IsValidFd(uffd_));
3588 struct uffdio_register uffd_register;
3589 uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
3590 uffd_register.range.len = size;
3591 uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
3592 CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
3593 << "ioctl_userfaultfd: register failed: " << strerror(errno)
3594 << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
3595 }
3596
3597 // TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
3598 // when we unregister the unused portion of the moving-space). Implement support
3599 // for that.
UnregisterUffd(uint8_t * start,size_t len)3600 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
3601 DCHECK(IsValidFd(uffd_));
3602 struct uffdio_range range;
3603 range.start = reinterpret_cast<uintptr_t>(start);
3604 range.len = len;
3605 CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
3606 << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
3607 << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
3608 }
3609
CompactionPhase()3610 void MarkCompact::CompactionPhase() {
3611 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3612 {
3613 int32_t freed_bytes = black_objs_slide_diff_;
3614 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3615 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3616 }
3617
3618 CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
3619
3620 ProcessLinearAlloc();
3621
3622 auto wait_for_compaction_counter = [this](size_t idx) {
3623 SigbusCounterType count = sigbus_in_progress_count_[idx].fetch_or(
3624 kSigbusCounterCompactionDoneMask, std::memory_order_acq_rel);
3625 // Wait for SIGBUS handlers already in play.
3626 for (uint32_t i = 0; count > 0; i++) {
3627 BackOff(i);
3628 count = sigbus_in_progress_count_[idx].load(std::memory_order_acquire);
3629 count &= ~kSigbusCounterCompactionDoneMask;
3630 }
3631 };
3632 // Set compaction-done bit in the first counter to indicate that gc-thread
3633 // is done compacting and mutators should stop incrementing this counter.
3634 // Mutator should tolerate ENOENT after this. This helps avoid priority
3635 // inversion in case mutators need to map zero-pages after compaction is
3636 // finished but before gc-thread manages to unregister the spaces.
3637 wait_for_compaction_counter(0);
3638
3639 // Unregister moving-space
3640 size_t moving_space_size = bump_pointer_space_->Capacity();
3641 size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
3642 if (used_size > 0) {
3643 UnregisterUffd(bump_pointer_space_->Begin(), used_size);
3644 }
3645 // Unregister linear-alloc spaces
3646 for (auto& data : linear_alloc_spaces_data_) {
3647 DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
3648 UnregisterUffd(data.begin_, data.shadow_.Size());
3649 }
3650
3651 // Set compaction-done bit in the second counter to indicate that gc-thread
3652 // is done unregistering the spaces and therefore mutators, if in SIGBUS,
3653 // should return without attempting to map the faulted page. When the mutator
3654 // will access the address again, it will succeed. Once this counter is 0,
3655 // the gc-thread can safely initialize/madvise the data structures.
3656 wait_for_compaction_counter(1);
3657
3658 // Release all of the memory taken by moving-space's from-map
3659 from_space_map_.MadviseDontNeedAndZero();
3660 // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
3661 DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
3662 << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
3663
3664 // madvise linear-allocs's page-status array. Note that we don't need to
3665 // madvise the shado-map as the pages from it were reclaimed in
3666 // ProcessLinearAlloc() after arenas were mapped.
3667 for (auto& data : linear_alloc_spaces_data_) {
3668 data.page_status_map_.MadviseDontNeedAndZero();
3669 }
3670 }
3671
3672 template <size_t kBufferSize>
3673 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
3674 public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)3675 explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
3676 : mark_compact_(mark_compact), self_(self) {}
3677
~ThreadRootsVisitor()3678 ~ThreadRootsVisitor() {
3679 Flush();
3680 }
3681
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)3682 void VisitRoots(mirror::Object*** roots,
3683 size_t count,
3684 [[maybe_unused]] const RootInfo& info) override
3685 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
3686 for (size_t i = 0; i < count; i++) {
3687 mirror::Object* obj = *roots[i];
3688 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3689 Push(obj);
3690 }
3691 }
3692 }
3693
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)3694 void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
3695 size_t count,
3696 [[maybe_unused]] const RootInfo& info) override
3697 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
3698 for (size_t i = 0; i < count; i++) {
3699 mirror::Object* obj = roots[i]->AsMirrorPtr();
3700 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3701 Push(obj);
3702 }
3703 }
3704 }
3705
3706 private:
Flush()3707 void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
3708 REQUIRES(Locks::heap_bitmap_lock_) {
3709 StackReference<mirror::Object>* start;
3710 StackReference<mirror::Object>* end;
3711 {
3712 MutexLock mu(self_, mark_compact_->lock_);
3713 // Loop here because even after expanding once it may not be sufficient to
3714 // accommodate all references. It's almost impossible, but there is no harm
3715 // in implementing it this way.
3716 while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
3717 mark_compact_->ExpandMarkStack();
3718 }
3719 }
3720 while (idx_ > 0) {
3721 *start++ = roots_[--idx_];
3722 }
3723 DCHECK_EQ(start, end);
3724 }
3725
Push(mirror::Object * obj)3726 void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
3727 REQUIRES(Locks::heap_bitmap_lock_) {
3728 if (UNLIKELY(idx_ >= kBufferSize)) {
3729 Flush();
3730 }
3731 roots_[idx_++].Assign(obj);
3732 }
3733
3734 StackReference<mirror::Object> roots_[kBufferSize];
3735 size_t idx_ = 0;
3736 MarkCompact* const mark_compact_;
3737 Thread* const self_;
3738 };
3739
3740 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
3741 public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)3742 explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
3743
Run(Thread * thread)3744 void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
3745 ScopedTrace trace("Marking thread roots");
3746 // Note: self is not necessarily equal to thread since thread may be
3747 // suspended.
3748 Thread* const self = Thread::Current();
3749 CHECK(thread == self
3750 || thread->IsSuspended()
3751 || thread->GetState() == ThreadState::kWaitingPerformingGc)
3752 << thread->GetState() << " thread " << thread << " self " << self;
3753 {
3754 ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
3755 thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
3756 }
3757 // Clear page-buffer to prepare for compaction phase.
3758 thread->SetThreadLocalGcBuffer(nullptr);
3759
3760 // If thread is a running mutator, then act on behalf of the garbage
3761 // collector. See the code in ThreadList::RunCheckpoint.
3762 mark_compact_->GetBarrier().Pass(self);
3763 }
3764
3765 private:
3766 MarkCompact* const mark_compact_;
3767 };
3768
MarkRootsCheckpoint(Thread * self,Runtime * runtime)3769 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
3770 // We revote TLABs later during paused round of marking.
3771 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3772 CheckpointMarkThreadRoots check_point(this);
3773 ThreadList* thread_list = runtime->GetThreadList();
3774 gc_barrier_.Init(self, 0);
3775 // Request the check point is run on all threads returning a count of the threads that must
3776 // run through the barrier including self.
3777 size_t barrier_count = thread_list->RunCheckpoint(&check_point);
3778 // Release locks then wait for all mutator threads to pass the barrier.
3779 // If there are no threads to wait which implys that all the checkpoint functions are finished,
3780 // then no need to release locks.
3781 if (barrier_count == 0) {
3782 return;
3783 }
3784 Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
3785 Locks::mutator_lock_->SharedUnlock(self);
3786 {
3787 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
3788 gc_barrier_.Increment(self, barrier_count);
3789 }
3790 Locks::mutator_lock_->SharedLock(self);
3791 Locks::heap_bitmap_lock_->ExclusiveLock(self);
3792 }
3793
MarkNonThreadRoots(Runtime * runtime)3794 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
3795 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3796 runtime->VisitNonThreadRoots(this);
3797 }
3798
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)3799 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
3800 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3801 runtime->VisitConcurrentRoots(this, flags);
3802 }
3803
RevokeAllThreadLocalBuffers()3804 void MarkCompact::RevokeAllThreadLocalBuffers() {
3805 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3806 bump_pointer_space_->RevokeAllThreadLocalBuffers();
3807 }
3808
3809 class MarkCompact::ScanObjectVisitor {
3810 public:
ScanObjectVisitor(MarkCompact * const mark_compact)3811 explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
3812 : mark_compact_(mark_compact) {}
3813
operator ()(ObjPtr<mirror::Object> obj) const3814 void operator()(ObjPtr<mirror::Object> obj) const
3815 ALWAYS_INLINE
3816 REQUIRES(Locks::heap_bitmap_lock_)
3817 REQUIRES_SHARED(Locks::mutator_lock_) {
3818 mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
3819 }
3820
3821 private:
3822 MarkCompact* const mark_compact_;
3823 };
3824
UpdateAndMarkModUnion()3825 void MarkCompact::UpdateAndMarkModUnion() {
3826 accounting::CardTable* const card_table = heap_->GetCardTable();
3827 for (const auto& space : immune_spaces_.GetSpaces()) {
3828 const char* name = space->IsZygoteSpace()
3829 ? "UpdateAndMarkZygoteModUnionTable"
3830 : "UpdateAndMarkImageModUnionTable";
3831 DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
3832 TimingLogger::ScopedTiming t(name, GetTimings());
3833 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3834 if (table != nullptr) {
3835 // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
3836 // that's fine because these objects are immutable enough (referent can
3837 // only be cleared) and hence the only referents they can have are intra-space.
3838 table->UpdateAndMarkReferences(this);
3839 } else {
3840 // No mod-union table, scan all dirty/aged cards in the corresponding
3841 // card-table. This can only occur for app images.
3842 card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
3843 space->Begin(),
3844 space->End(),
3845 ScanObjectVisitor(this),
3846 gc::accounting::CardTable::kCardAged);
3847 }
3848 }
3849 }
3850
MarkReachableObjects()3851 void MarkCompact::MarkReachableObjects() {
3852 UpdateAndMarkModUnion();
3853 // Recursively mark all the non-image bits set in the mark bitmap.
3854 ProcessMarkStack();
3855 }
3856
ScanDirtyObjects(bool paused,uint8_t minimum_age)3857 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
3858 accounting::CardTable* card_table = heap_->GetCardTable();
3859 for (const auto& space : heap_->GetContinuousSpaces()) {
3860 const char* name = nullptr;
3861 switch (space->GetGcRetentionPolicy()) {
3862 case space::kGcRetentionPolicyNeverCollect:
3863 name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
3864 break;
3865 case space::kGcRetentionPolicyFullCollect:
3866 name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
3867 break;
3868 case space::kGcRetentionPolicyAlwaysCollect:
3869 name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
3870 break;
3871 }
3872 TimingLogger::ScopedTiming t(name, GetTimings());
3873 card_table->Scan</*kClearCard*/ false>(
3874 space->GetMarkBitmap(), space->Begin(), space->End(), ScanObjectVisitor(this), minimum_age);
3875 }
3876 }
3877
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)3878 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
3879 ScanDirtyObjects(paused, minimum_age);
3880 ProcessMarkStack();
3881 }
3882
MarkRoots(VisitRootFlags flags)3883 void MarkCompact::MarkRoots(VisitRootFlags flags) {
3884 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3885 Runtime* runtime = Runtime::Current();
3886 // Make sure that the checkpoint which collects the stack roots is the first
3887 // one capturning GC-roots. As this one is supposed to find the address
3888 // everything allocated after that (during this marking phase) will be
3889 // considered 'marked'.
3890 MarkRootsCheckpoint(thread_running_gc_, runtime);
3891 MarkNonThreadRoots(runtime);
3892 MarkConcurrentRoots(flags, runtime);
3893 }
3894
PreCleanCards()3895 void MarkCompact::PreCleanCards() {
3896 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3897 CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
3898 // Age the card-table before thread stack scanning checkpoint in MarkRoots()
3899 // as it ensures that there are no in-progress write barriers which started
3900 // prior to aging the card-table.
3901 PrepareCardTableForMarking(/*clear_alloc_space_cards*/ false);
3902 MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
3903 RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
3904 }
3905
3906 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
3907 // in this case, then we need a stop-the-world (STW) round in the end to mark
3908 // objects which were written into concurrently while concurrent marking was
3909 // performed.
3910 // In order to minimize the pause time, we could take one of the two approaches:
3911 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
3912 // below a threshold.
3913 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
3914 // that it's taking too long, then resume mutators and retry.
3915 //
3916 // Given the non-trivial fixed overhead of running a round (card table and root
3917 // scan), it might be better to go with approach 2.
MarkingPhase()3918 void MarkCompact::MarkingPhase() {
3919 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3920 DCHECK_EQ(thread_running_gc_, Thread::Current());
3921 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3922 MaybeClampGcStructures();
3923 PrepareCardTableForMarking(/*clear_alloc_space_cards*/ true);
3924 MarkZygoteLargeObjects();
3925 MarkRoots(
3926 static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
3927 MarkReachableObjects();
3928 // Pre-clean dirtied cards to reduce pauses.
3929 PreCleanCards();
3930
3931 // Setup reference processing and forward soft references once before enabling
3932 // slow path (in MarkingPause)
3933 ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
3934 bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
3935 rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
3936 if (!clear_soft_references) {
3937 // Forward as many SoftReferences as possible before inhibiting reference access.
3938 rp->ForwardSoftReferences(GetTimings());
3939 }
3940 }
3941
3942 class MarkCompact::RefFieldsVisitor {
3943 public:
RefFieldsVisitor(MarkCompact * const mark_compact)3944 ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact)
3945 : mark_compact_(mark_compact) {}
3946
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const3947 ALWAYS_INLINE void operator()(mirror::Object* obj,
3948 MemberOffset offset,
3949 [[maybe_unused]] bool is_static) const
3950 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3951 if (kCheckLocks) {
3952 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
3953 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
3954 }
3955 mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
3956 }
3957
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const3958 void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
3959 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3960 mark_compact_->DelayReferenceReferent(klass, ref);
3961 }
3962
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3963 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
3964 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3965 if (!root->IsNull()) {
3966 VisitRoot(root);
3967 }
3968 }
3969
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3970 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3971 REQUIRES(Locks::heap_bitmap_lock_)
3972 REQUIRES_SHARED(Locks::mutator_lock_) {
3973 if (kCheckLocks) {
3974 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
3975 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
3976 }
3977 mark_compact_->MarkObject(root->AsMirrorPtr());
3978 }
3979
3980 private:
3981 MarkCompact* const mark_compact_;
3982 };
3983
3984 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const3985 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
3986 const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
3987 size_t words = 0;
3988 for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
3989 words += POPCOUNT(Bitmap::Begin()[index + i]);
3990 }
3991 return words * kAlignment;
3992 }
3993
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)3994 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
3995 DCHECK(obj != nullptr);
3996 DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
3997 uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
3998 UpdateClassAfterObjectMap(obj);
3999 size_t size = RoundUp(obj_size, kAlignment);
4000 uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4001 size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize;
4002 // Compute the bit-index within the chunk-info vector word.
4003 bit_index %= kBitsPerVectorWord;
4004 size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4005
4006 chunk_info_vec_[chunk_idx++] += first_chunk_portion;
4007 DCHECK_LE(first_chunk_portion, size);
4008 for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4009 DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4010 chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4011 }
4012 chunk_info_vec_[chunk_idx] += size;
4013 freed_objects_--;
4014 }
4015
4016 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4017 void MarkCompact::ScanObject(mirror::Object* obj) {
4018 mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4019 // TODO(lokeshgidra): Remove the following condition once b/373609505 is fixed.
4020 if (UNLIKELY(klass == nullptr)) {
4021 // It was seen in ConcurrentCopying GC that after a small wait when we reload
4022 // the class pointer, it turns out to be a valid class object. So as a workaround,
4023 // we can continue execution and log an error that this happened.
4024 for (size_t i = 0; i < 1000; i++) {
4025 // Wait for 1ms at a time. Don't wait for more than 1 second in total.
4026 usleep(1000);
4027 klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4028 if (klass != nullptr) {
4029 std::ostringstream oss;
4030 klass->DumpClass(oss, mirror::Class::kDumpClassFullDetail);
4031 LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj
4032 << " found to be null first. Reloading after " << i
4033 << " iterations of 1ms sleep fetched klass: " << oss.str();
4034 break;
4035 }
4036 }
4037
4038 if (UNLIKELY(klass == nullptr)) {
4039 // It must be heap corruption.
4040 LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj << " found to be null.";
4041 }
4042 heap_->GetVerification()->LogHeapCorruption(
4043 obj, mirror::Object::ClassOffset(), klass, /*fatal=*/true);
4044 }
4045 // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4046 // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4047 // here and pass that information to `UpdateLivenessInfo`.
4048 size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4049 bytes_scanned_ += obj_size;
4050
4051 RefFieldsVisitor visitor(this);
4052 DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4053 if (kUpdateLiveWords && HasAddress(obj)) {
4054 UpdateLivenessInfo(obj, obj_size);
4055 }
4056 obj->VisitReferences(visitor, visitor);
4057 }
4058
4059 // Scan anything that's on the mark stack.
ProcessMarkStack()4060 void MarkCompact::ProcessMarkStack() {
4061 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4062 // TODO: try prefetch like in CMS
4063 while (!mark_stack_->IsEmpty()) {
4064 mirror::Object* obj = mark_stack_->PopBack();
4065 DCHECK(obj != nullptr);
4066 ScanObject</*kUpdateLiveWords*/ true>(obj);
4067 }
4068 }
4069
ExpandMarkStack()4070 void MarkCompact::ExpandMarkStack() {
4071 const size_t new_size = mark_stack_->Capacity() * 2;
4072 std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4073 mark_stack_->End());
4074 mark_stack_->Resize(new_size);
4075 for (auto& ref : temp) {
4076 mark_stack_->PushBack(ref.AsMirrorPtr());
4077 }
4078 DCHECK(!mark_stack_->IsFull());
4079 }
4080
PushOnMarkStack(mirror::Object * obj)4081 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4082 if (UNLIKELY(mark_stack_->IsFull())) {
4083 ExpandMarkStack();
4084 }
4085 mark_stack_->PushBack(obj);
4086 }
4087
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4088 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4089 mirror::Object* holder,
4090 MemberOffset offset) {
4091 DCHECK(obj != nullptr);
4092 if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4093 PushOnMarkStack(obj);
4094 }
4095 }
4096
4097 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4098 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4099 mirror::Object* holder,
4100 MemberOffset offset) {
4101 // We expect most of the referenes to be in bump-pointer space, so try that
4102 // first to keep the cost of this function minimal.
4103 if (LIKELY(HasAddress(obj))) {
4104 return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4105 : !moving_space_bitmap_->Set(obj);
4106 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4107 return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4108 : !non_moving_space_bitmap_->Set(obj);
4109 } else if (immune_spaces_.ContainsObject(obj)) {
4110 DCHECK(IsMarked(obj) != nullptr);
4111 return false;
4112 } else {
4113 // Must be a large-object space, otherwise it's a case of heap corruption.
4114 if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
4115 // Objects in large-object space are aligned to the large-object alignment.
4116 // So if we have an object which doesn't belong to any space and is not
4117 // page-aligned as well, then it's memory corruption.
4118 // TODO: implement protect/unprotect in bump-pointer space.
4119 heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4120 }
4121 DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4122 << "ref=" << obj
4123 << " doesn't belong to any of the spaces and large object space doesn't exist";
4124 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4125 DCHECK(los_bitmap->HasAddress(obj));
4126 if (kParallel) {
4127 los_bitmap->AtomicTestAndSet(obj);
4128 } else {
4129 los_bitmap->Set(obj);
4130 }
4131 // We only have primitive arrays in large object space. So there is no
4132 // reason to push into mark-stack.
4133 DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4134 return false;
4135 }
4136 }
4137
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4138 inline void MarkCompact::MarkObject(mirror::Object* obj,
4139 mirror::Object* holder,
4140 MemberOffset offset) {
4141 if (obj != nullptr) {
4142 MarkObjectNonNull(obj, holder, offset);
4143 }
4144 }
4145
MarkObject(mirror::Object * obj)4146 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4147 MarkObject(obj, nullptr, MemberOffset(0));
4148 return obj;
4149 }
4150
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4151 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4152 [[maybe_unused]] bool do_atomic_update) {
4153 MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4154 }
4155
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4156 void MarkCompact::VisitRoots(mirror::Object*** roots,
4157 size_t count,
4158 const RootInfo& info) {
4159 if (compacting_) {
4160 uint8_t* moving_space_begin = black_dense_end_;
4161 uint8_t* moving_space_end = moving_space_end_;
4162 for (size_t i = 0; i < count; ++i) {
4163 UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4164 }
4165 } else {
4166 for (size_t i = 0; i < count; ++i) {
4167 MarkObjectNonNull(*roots[i]);
4168 }
4169 }
4170 }
4171
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4172 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4173 size_t count,
4174 const RootInfo& info) {
4175 // TODO: do we need to check if the root is null or not?
4176 if (compacting_) {
4177 uint8_t* moving_space_begin = black_dense_end_;
4178 uint8_t* moving_space_end = moving_space_end_;
4179 for (size_t i = 0; i < count; ++i) {
4180 UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4181 }
4182 } else {
4183 for (size_t i = 0; i < count; ++i) {
4184 MarkObjectNonNull(roots[i]->AsMirrorPtr());
4185 }
4186 }
4187 }
4188
IsMarked(mirror::Object * obj)4189 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4190 if (HasAddress(obj)) {
4191 const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4192 if (compacting_) {
4193 if (is_black) {
4194 return PostCompactBlackObjAddr(obj);
4195 } else if (moving_space_bitmap_->Test(obj)) {
4196 if (reinterpret_cast<uint8_t*>(obj) < black_dense_end_) {
4197 return obj;
4198 } else {
4199 return PostCompactOldObjAddr(obj);
4200 }
4201 } else {
4202 return nullptr;
4203 }
4204 }
4205 return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4206 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4207 if (non_moving_space_bitmap_->Test(obj)) {
4208 return obj;
4209 }
4210 } else if (immune_spaces_.ContainsObject(obj)) {
4211 return obj;
4212 } else {
4213 DCHECK(heap_->GetLargeObjectsSpace())
4214 << "ref=" << obj
4215 << " doesn't belong to any of the spaces and large object space doesn't exist";
4216 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4217 if (los_bitmap->HasAddress(obj)) {
4218 DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
4219 if (los_bitmap->Test(obj)) {
4220 return obj;
4221 }
4222 } else {
4223 // The given obj is not in any of the known spaces, so return null. This could
4224 // happen for instance in interpreter caches wherein a concurrent updation
4225 // to the cache could result in obj being a non-reference. This is
4226 // tolerable because SweepInterpreterCaches only updates if the given
4227 // object has moved, which can't be the case for the non-reference.
4228 return nullptr;
4229 }
4230 }
4231 return marking_done_ && IsOnAllocStack(obj) ? obj : nullptr;
4232 }
4233
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4234 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4235 [[maybe_unused]] bool do_atomic_update) {
4236 mirror::Object* ref = obj->AsMirrorPtr();
4237 if (ref == nullptr) {
4238 return true;
4239 }
4240 return IsMarked(ref);
4241 }
4242
4243 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4244 // has not yet been marked, put it on the appropriate list in the heap for later
4245 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4246 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4247 ObjPtr<mirror::Reference> ref) {
4248 heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4249 }
4250
FinishPhase()4251 void MarkCompact::FinishPhase() {
4252 GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4253 bool is_zygote = Runtime::Current()->IsZygote();
4254 compacting_ = false;
4255 marking_done_ = false;
4256
4257 ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size());
4258 info_map_.MadviseDontNeedAndZero();
4259 live_words_bitmap_->ClearBitmap();
4260 if (moving_space_begin_ == black_dense_end_) {
4261 moving_space_bitmap_->Clear();
4262 } else {
4263 DCHECK_LT(moving_space_begin_, black_dense_end_);
4264 DCHECK_LE(black_dense_end_, moving_space_end_);
4265 moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(black_dense_end_),
4266 reinterpret_cast<mirror::Object*>(moving_space_end_));
4267 }
4268 bump_pointer_space_->SetBlackDenseRegionSize(black_dense_end_ - moving_space_begin_);
4269
4270 if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
4271 // This unregisters all ranges as a side-effect.
4272 close(uffd_);
4273 uffd_ = kFdUnused;
4274 uffd_initialized_ = false;
4275 }
4276 CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty.
4277 mark_stack_->Reset();
4278 DCHECK_EQ(thread_running_gc_, Thread::Current());
4279 if (kIsDebugBuild) {
4280 MutexLock mu(thread_running_gc_, lock_);
4281 if (updated_roots_.get() != nullptr) {
4282 updated_roots_->clear();
4283 }
4284 }
4285 class_after_obj_map_.clear();
4286 linear_alloc_arenas_.clear();
4287 {
4288 ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4289 WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
4290 heap_->ClearMarkedObjects();
4291 }
4292 GcVisitedArenaPool* arena_pool =
4293 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
4294 arena_pool->DeleteUnusedArenas();
4295 }
4296
4297 } // namespace collector
4298 } // namespace gc
4299 } // namespace art
4300