1 /*
2 * Copyright 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "base/gc_visited_arena_pool.h"
18
19 #include <sys/mman.h>
20 #include <sys/types.h>
21 #include <unistd.h>
22
23 #include "base/arena_allocator-inl.h"
24 #include "base/memfd.h"
25 #include "base/utils.h"
26 #include "gc/collector/mark_compact-inl.h"
27
28 namespace art HIDDEN {
29
TrackedArena(uint8_t * start,size_t size,bool pre_zygote_fork,bool single_obj_arena)30 TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena)
31 : Arena(),
32 first_obj_array_(nullptr),
33 pre_zygote_fork_(pre_zygote_fork),
34 waiting_for_deletion_(false) {
35 static_assert(ArenaAllocator::kArenaAlignment <= kMinPageSize,
36 "Arena should not need stronger alignment than kMinPageSize.");
37 memory_ = start;
38 size_ = size;
39 if (single_obj_arena) {
40 // We have only one object in this arena and it is expected to consume the
41 // entire arena.
42 bytes_allocated_ = size;
43 } else {
44 DCHECK_ALIGNED_PARAM(size, gPageSize);
45 DCHECK_ALIGNED_PARAM(start, gPageSize);
46 size_t arr_size = DivideByPageSize(size);
47 first_obj_array_.reset(new uint8_t*[arr_size]);
48 std::fill_n(first_obj_array_.get(), arr_size, nullptr);
49 }
50 }
51
Release()52 void TrackedArena::Release() {
53 if (bytes_allocated_ > 0) {
54 ZeroAndReleaseMemory(Begin(), Size());
55 if (first_obj_array_.get() != nullptr) {
56 std::fill_n(first_obj_array_.get(), DivideByPageSize(Size()), nullptr);
57 }
58 bytes_allocated_ = 0;
59 }
60 }
61
SetFirstObject(uint8_t * obj_begin,uint8_t * obj_end)62 void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) {
63 DCHECK(first_obj_array_.get() != nullptr);
64 DCHECK_LE(static_cast<void*>(Begin()), static_cast<void*>(obj_end));
65 DCHECK_LT(static_cast<void*>(obj_begin), static_cast<void*>(obj_end));
66 GcVisitedArenaPool* arena_pool =
67 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
68 size_t idx = DivideByPageSize(static_cast<size_t>(obj_begin - Begin()));
69 size_t last_byte_idx = DivideByPageSize(static_cast<size_t>(obj_end - 1 - Begin()));
70 // Do the update below with arena-pool's lock in shared-mode to serialize with
71 // the compaction-pause wherein we acquire it exclusively. This is to ensure
72 // that last-byte read there doesn't change after reading it and before
73 // userfaultfd registration.
74 ReaderMutexLock rmu(Thread::Current(), arena_pool->GetLock());
75 // If the addr is at the beginning of a page, then we set it for that page too.
76 if (IsAlignedParam(obj_begin, gPageSize)) {
77 first_obj_array_[idx] = obj_begin;
78 }
79 while (idx < last_byte_idx) {
80 first_obj_array_[++idx] = obj_begin;
81 }
82 }
83
AddMap(size_t min_size)84 uint8_t* GcVisitedArenaPool::AddMap(size_t min_size) {
85 size_t size = std::max(min_size, kLinearAllocPoolSize);
86 #if defined(__LP64__)
87 // This is true only when we are running a 64-bit dex2oat to compile a 32-bit image.
88 if (low_4gb_) {
89 size = std::max(min_size, kLow4GBLinearAllocPoolSize);
90 }
91 #endif
92 size_t alignment = gc::Heap::BestPageTableAlignment(size);
93 DCHECK_GE(size, gc::Heap::GetPMDSize());
94 std::string err_msg;
95 maps_.emplace_back(MemMap::MapAnonymousAligned(
96 name_, size, PROT_READ | PROT_WRITE, low_4gb_, alignment, &err_msg));
97 MemMap& map = maps_.back();
98 if (!map.IsValid()) {
99 LOG(FATAL) << "Failed to allocate " << name_ << ": " << err_msg;
100 UNREACHABLE();
101 }
102
103 if (gUseUserfaultfd) {
104 // Create a shadow-map for the map being added for userfaultfd GC
105 gc::collector::MarkCompact* mark_compact =
106 Runtime::Current()->GetHeap()->MarkCompactCollector();
107 DCHECK_NE(mark_compact, nullptr);
108 mark_compact->AddLinearAllocSpaceData(map.Begin(), map.Size());
109 }
110 Chunk* chunk = new Chunk(map.Begin(), map.Size());
111 best_fit_allocs_.insert(chunk);
112 free_chunks_.insert(chunk);
113 return map.Begin();
114 }
115
GcVisitedArenaPool(bool low_4gb,bool is_zygote,const char * name)116 GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, bool is_zygote, const char* name)
117 : lock_("gc-visited arena-pool", kGenericBottomLock),
118 bytes_allocated_(0),
119 unused_arenas_(nullptr),
120 name_(name),
121 defer_arena_freeing_(false),
122 low_4gb_(low_4gb),
123 pre_zygote_fork_(is_zygote) {}
124
~GcVisitedArenaPool()125 GcVisitedArenaPool::~GcVisitedArenaPool() {
126 for (Chunk* chunk : free_chunks_) {
127 delete chunk;
128 }
129 // Must not delete chunks from best_fit_allocs_ as they are shared with
130 // free_chunks_.
131 }
132
GetBytesAllocated() const133 size_t GcVisitedArenaPool::GetBytesAllocated() const {
134 ReaderMutexLock rmu(Thread::Current(), lock_);
135 return bytes_allocated_;
136 }
137
AddPreZygoteForkMap(size_t size)138 uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) {
139 DCHECK(pre_zygote_fork_);
140 std::string pre_fork_name = "Pre-zygote-";
141 pre_fork_name += name_;
142 std::string err_msg;
143 maps_.emplace_back(MemMap::MapAnonymous(
144 pre_fork_name.c_str(), size, PROT_READ | PROT_WRITE, low_4gb_, &err_msg));
145 MemMap& map = maps_.back();
146 if (!map.IsValid()) {
147 LOG(FATAL) << "Failed to allocate " << pre_fork_name << ": " << err_msg;
148 UNREACHABLE();
149 }
150 return map.Begin();
151 }
152
AllocSingleObjArena(size_t size)153 uint8_t* GcVisitedArenaPool::AllocSingleObjArena(size_t size) {
154 WriterMutexLock wmu(Thread::Current(), lock_);
155 Arena* arena;
156 DCHECK(gUseUserfaultfd);
157 // To minimize private dirty, all class and intern table allocations are
158 // done outside LinearAlloc range so they are untouched during GC.
159 if (pre_zygote_fork_) {
160 uint8_t* begin = static_cast<uint8_t*>(malloc(size));
161 auto insert_result = allocated_arenas_.insert(
162 new TrackedArena(begin, size, /*pre_zygote_fork=*/true, /*single_obj_arena=*/true));
163 arena = *insert_result.first;
164 } else {
165 arena = AllocArena(size, /*need_first_obj_arr=*/true);
166 }
167 return arena->Begin();
168 }
169
FreeSingleObjArena(uint8_t * addr)170 void GcVisitedArenaPool::FreeSingleObjArena(uint8_t* addr) {
171 Thread* self = Thread::Current();
172 size_t size;
173 bool zygote_arena;
174 {
175 TrackedArena temp_arena(addr);
176 WriterMutexLock wmu(self, lock_);
177 auto iter = allocated_arenas_.find(&temp_arena);
178 DCHECK(iter != allocated_arenas_.end());
179 TrackedArena* arena = *iter;
180 size = arena->Size();
181 zygote_arena = arena->IsPreZygoteForkArena();
182 DCHECK_EQ(arena->Begin(), addr);
183 DCHECK(arena->IsSingleObjectArena());
184 allocated_arenas_.erase(iter);
185 if (defer_arena_freeing_) {
186 arena->SetupForDeferredDeletion(unused_arenas_);
187 unused_arenas_ = arena;
188 } else {
189 delete arena;
190 }
191 }
192 // Refer to the comment in FreeArenaChain() for why the pages are released
193 // after deleting the arena.
194 if (zygote_arena) {
195 free(addr);
196 } else {
197 ZeroAndReleaseMemory(addr, size);
198 WriterMutexLock wmu(self, lock_);
199 FreeRangeLocked(addr, size);
200 }
201 }
202
AllocArena(size_t size,bool single_obj_arena)203 Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) {
204 // Return only page aligned sizes so that madvise can be leveraged.
205 size = RoundUp(size, gPageSize);
206 if (pre_zygote_fork_) {
207 // The first fork out of zygote hasn't happened yet. Allocate arena in a
208 // private-anonymous mapping to retain clean pages across fork.
209 uint8_t* addr = AddPreZygoteForkMap(size);
210 auto insert_result = allocated_arenas_.insert(
211 new TrackedArena(addr, size, /*pre_zygote_fork=*/true, single_obj_arena));
212 DCHECK(insert_result.second);
213 return *insert_result.first;
214 }
215
216 Chunk temp_chunk(nullptr, size);
217 auto best_fit_iter = best_fit_allocs_.lower_bound(&temp_chunk);
218 if (UNLIKELY(best_fit_iter == best_fit_allocs_.end())) {
219 AddMap(size);
220 best_fit_iter = best_fit_allocs_.lower_bound(&temp_chunk);
221 CHECK(best_fit_iter != best_fit_allocs_.end());
222 }
223 auto free_chunks_iter = free_chunks_.find(*best_fit_iter);
224 DCHECK(free_chunks_iter != free_chunks_.end());
225 Chunk* chunk = *best_fit_iter;
226 DCHECK_EQ(chunk, *free_chunks_iter);
227 // if the best-fit chunk < 2x the requested size, then give the whole chunk.
228 if (chunk->size_ < 2 * size) {
229 DCHECK_GE(chunk->size_, size);
230 auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_,
231 chunk->size_,
232 /*pre_zygote_fork=*/false,
233 single_obj_arena));
234 DCHECK(insert_result.second);
235 free_chunks_.erase(free_chunks_iter);
236 best_fit_allocs_.erase(best_fit_iter);
237 delete chunk;
238 return *insert_result.first;
239 } else {
240 auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_,
241 size,
242 /*pre_zygote_fork=*/false,
243 single_obj_arena));
244 DCHECK(insert_result.second);
245 // Compute next iterators for faster insert later.
246 auto next_best_fit_iter = best_fit_iter;
247 next_best_fit_iter++;
248 auto next_free_chunks_iter = free_chunks_iter;
249 next_free_chunks_iter++;
250 auto best_fit_nh = best_fit_allocs_.extract(best_fit_iter);
251 auto free_chunks_nh = free_chunks_.extract(free_chunks_iter);
252 best_fit_nh.value()->addr_ += size;
253 best_fit_nh.value()->size_ -= size;
254 DCHECK_EQ(free_chunks_nh.value()->addr_, chunk->addr_);
255 best_fit_allocs_.insert(next_best_fit_iter, std::move(best_fit_nh));
256 free_chunks_.insert(next_free_chunks_iter, std::move(free_chunks_nh));
257 return *insert_result.first;
258 }
259 }
260
FreeRangeLocked(uint8_t * range_begin,size_t range_size)261 void GcVisitedArenaPool::FreeRangeLocked(uint8_t* range_begin, size_t range_size) {
262 Chunk temp_chunk(range_begin, range_size);
263 bool merge_with_next = false;
264 bool merge_with_prev = false;
265 auto next_iter = free_chunks_.lower_bound(&temp_chunk);
266 auto iter_for_extract = free_chunks_.end();
267 // Can we merge with the previous chunk?
268 if (next_iter != free_chunks_.begin()) {
269 auto prev_iter = next_iter;
270 prev_iter--;
271 merge_with_prev = (*prev_iter)->addr_ + (*prev_iter)->size_ == range_begin;
272 if (merge_with_prev) {
273 range_begin = (*prev_iter)->addr_;
274 range_size += (*prev_iter)->size_;
275 // Hold on to the iterator for faster extract later
276 iter_for_extract = prev_iter;
277 }
278 }
279 // Can we merge with the next chunk?
280 if (next_iter != free_chunks_.end()) {
281 merge_with_next = range_begin + range_size == (*next_iter)->addr_;
282 if (merge_with_next) {
283 range_size += (*next_iter)->size_;
284 if (merge_with_prev) {
285 auto iter = next_iter;
286 next_iter++;
287 // Keep only one of the two chunks to be expanded.
288 Chunk* chunk = *iter;
289 size_t erase_res = best_fit_allocs_.erase(chunk);
290 DCHECK_EQ(erase_res, 1u);
291 free_chunks_.erase(iter);
292 delete chunk;
293 } else {
294 iter_for_extract = next_iter;
295 next_iter++;
296 }
297 }
298 }
299
300 // Extract-insert avoids 2/4 destroys and 2/2 creations
301 // as compared to erase-insert, so use that when merging.
302 if (merge_with_prev || merge_with_next) {
303 auto free_chunks_nh = free_chunks_.extract(iter_for_extract);
304 auto best_fit_allocs_nh = best_fit_allocs_.extract(*iter_for_extract);
305
306 free_chunks_nh.value()->addr_ = range_begin;
307 DCHECK_EQ(best_fit_allocs_nh.value()->addr_, range_begin);
308 free_chunks_nh.value()->size_ = range_size;
309 DCHECK_EQ(best_fit_allocs_nh.value()->size_, range_size);
310
311 free_chunks_.insert(next_iter, std::move(free_chunks_nh));
312 // Since the chunk's size has expanded, the hint won't be useful
313 // for best-fit set.
314 best_fit_allocs_.insert(std::move(best_fit_allocs_nh));
315 } else {
316 DCHECK(iter_for_extract == free_chunks_.end());
317 Chunk* chunk = new Chunk(range_begin, range_size);
318 free_chunks_.insert(next_iter, chunk);
319 best_fit_allocs_.insert(chunk);
320 }
321 }
322
FreeArenaChain(Arena * first)323 void GcVisitedArenaPool::FreeArenaChain(Arena* first) {
324 if (kRunningOnMemoryTool) {
325 for (Arena* arena = first; arena != nullptr; arena = arena->Next()) {
326 MEMORY_TOOL_MAKE_UNDEFINED(arena->Begin(), arena->GetBytesAllocated());
327 }
328 }
329
330 // TODO: Handle the case when arena_allocator::kArenaAllocatorPreciseTracking
331 // is true. See MemMapArenaPool::FreeArenaChain() for example.
332 CHECK(!arena_allocator::kArenaAllocatorPreciseTracking);
333 Thread* self = Thread::Current();
334 // vector of arena ranges to be freed and whether they are pre-zygote-fork.
335 std::vector<std::tuple<uint8_t*, size_t, bool>> free_ranges;
336
337 {
338 WriterMutexLock wmu(self, lock_);
339 while (first != nullptr) {
340 TrackedArena* temp = down_cast<TrackedArena*>(first);
341 DCHECK(!temp->IsSingleObjectArena());
342 first = first->Next();
343 free_ranges.emplace_back(temp->Begin(), temp->Size(), temp->IsPreZygoteForkArena());
344 // In other implementations of ArenaPool this is calculated when asked for,
345 // thanks to the list of free arenas that is kept around. But in this case,
346 // we release the freed arena back to the pool and therefore need to
347 // calculate here.
348 bytes_allocated_ += temp->GetBytesAllocated();
349 auto iter = allocated_arenas_.find(temp);
350 DCHECK(iter != allocated_arenas_.end());
351 allocated_arenas_.erase(iter);
352 if (defer_arena_freeing_) {
353 temp->SetupForDeferredDeletion(unused_arenas_);
354 unused_arenas_ = temp;
355 } else {
356 delete temp;
357 }
358 }
359 }
360
361 // madvise of arenas must be done after the above loop which serializes with
362 // MarkCompact::ProcessLinearAlloc() so that if it finds an arena to be not
363 // 'waiting-for-deletion' then it finishes the arena's processing before
364 // clearing here. Otherwise, we could have a situation wherein arena-pool
365 // assumes the memory range of the arena(s) to be zero'ed (by madvise),
366 // whereas GC maps stale arena pages.
367 for (auto& iter : free_ranges) {
368 // No need to madvise pre-zygote-fork arenas as they will munmapped below.
369 if (!std::get<2>(iter)) {
370 ZeroAndReleaseMemory(std::get<0>(iter), std::get<1>(iter));
371 }
372 }
373
374 WriterMutexLock wmu(self, lock_);
375 for (auto& iter : free_ranges) {
376 if (UNLIKELY(std::get<2>(iter))) {
377 bool found = false;
378 for (auto map_iter = maps_.begin(); map_iter != maps_.end(); map_iter++) {
379 if (map_iter->Begin() == std::get<0>(iter)) {
380 // erase will destruct the MemMap and thereby munmap. But this happens
381 // very rarely so it's ok to do it with lock acquired.
382 maps_.erase(map_iter);
383 found = true;
384 break;
385 }
386 }
387 CHECK(found);
388 } else {
389 FreeRangeLocked(std::get<0>(iter), std::get<1>(iter));
390 }
391 }
392 }
393
DeleteUnusedArenas()394 void GcVisitedArenaPool::DeleteUnusedArenas() {
395 TrackedArena* arena;
396 {
397 WriterMutexLock wmu(Thread::Current(), lock_);
398 defer_arena_freeing_ = false;
399 arena = unused_arenas_;
400 unused_arenas_ = nullptr;
401 }
402 while (arena != nullptr) {
403 TrackedArena* temp = down_cast<TrackedArena*>(arena->Next());
404 delete arena;
405 arena = temp;
406 }
407 }
408
409 } // namespace art
410