1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef PARTITION_ALLOC_PAGE_ALLOCATOR_H_
6 #define PARTITION_ALLOC_PAGE_ALLOCATOR_H_
7
8 #include <cstddef>
9 #include <cstdint>
10
11 #include "build/build_config.h"
12 #include "partition_alloc/page_allocator_constants.h"
13 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
14 #include "partition_alloc/partition_alloc_base/component_export.h"
15 #include "partition_alloc/partition_alloc_buildflags.h"
16 #include "partition_alloc/thread_isolation/thread_isolation.h"
17
18 namespace partition_alloc {
19
20 struct PageAccessibilityConfiguration {
21 enum Permissions {
22 kInaccessible,
23 // This flag is valid only with AllocPages(), where in creates kInaccessible
24 // pages that may later be re-mapped as executable, on platforms which
25 // distinguish never-executable and maybe-executable pages.
26 kInaccessibleWillJitLater,
27 kRead,
28 kReadWrite,
29 // This flag is mapped to kReadWrite on systems that
30 // don't support MTE.
31 kReadWriteTagged,
32 // This flag is mapped to kReadExecute on systems
33 // that don't support Arm's BTI.
34 kReadExecuteProtected,
35 kReadExecute,
36 // This flag is mapped to `kReadWriteExecute` on systems that do not support
37 // Arm's BTI.
38 kReadWriteExecuteProtected,
39 // This flag is deprecated and will go away soon.
40 // TODO(bbudge) Remove this as soon as V8 doesn't need RWX pages.
41 kReadWriteExecute,
42 };
43
44 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
PageAccessibilityConfigurationPageAccessibilityConfiguration45 constexpr explicit PageAccessibilityConfiguration(Permissions permissions)
46 : permissions(permissions) {}
PageAccessibilityConfigurationPageAccessibilityConfiguration47 constexpr PageAccessibilityConfiguration(
48 Permissions permissions,
49 ThreadIsolationOption thread_isolation)
50 : permissions(permissions), thread_isolation(thread_isolation) {}
51 #else
PageAccessibilityConfigurationPageAccessibilityConfiguration52 constexpr explicit PageAccessibilityConfiguration(Permissions permissions)
53 : permissions(permissions) {}
54 #endif // BUILDFLAG(ENABLE_THREAD_ISOLATION)
55
56 Permissions permissions;
57 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
58 // Tag the page with a Memory Protection Key. Use 0 for none.
59 ThreadIsolationOption thread_isolation;
60 #endif // BUILDFLAG(ENABLE_THREAD_ISOLATION)
61 };
62
63 // Use for De/RecommitSystemPages API.
64 enum class PageAccessibilityDisposition {
65 // Enforces permission update (Decommit will set to
66 // PageAccessibilityConfiguration::kInaccessible;
67 // Recommit will set to whatever was requested, other than
68 // PageAccessibilityConfiguration::kInaccessible).
69 kRequireUpdate,
70 // Will not update permissions, if the platform supports that (POSIX & Fuchsia
71 // only).
72 kAllowKeepForPerf,
73 };
74
75 // Some platforms (including macOS and some Linux-based ones) support tagged
76 // memory regions, to help in debugging. On Android, these tags are used to name
77 // anonymous mappings.
78 //
79 // kChromium is the default value, used to distinguish general
80 // Chromium-originated allocations from other ones (e.g. from platform
81 // libraries).
82 enum class PageTag {
83 kSimulation = 251, // Memory simulator tool.
84 kBlinkGC = 252, // Blink GC pages.
85 kPartitionAlloc = 253, // PartitionAlloc, no matter the partition.
86 kChromium = 254, // Chromium page.
87 kV8 = 255, // V8 heap pages.
88
89 kFirst = kSimulation, // Minimum tag value.
90 kLast = kV8 // Maximum tag value.
91 };
92
93 // See
94 // https://github.com/apple-oss-distributions/xnu/blob/5c2921b07a2480ab43ec66f5b9e41cb872bc554f/osfmk/mach/vm_statistics.h#L687
95 static_assert(static_cast<int>(PageTag::kLast) >= 240,
96 "The first application-reserved tag on macOS is 240, see "
97 "vm_statistics.h in XNU.");
98 static_assert(
99 static_cast<int>(PageTag::kLast) < 256,
100 "Tags are only 1 byte long on macOS, see vm_statistics.h in XNU.");
101
102 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
103 uintptr_t NextAlignedWithOffset(uintptr_t ptr,
104 uintptr_t alignment,
105 uintptr_t requested_offset);
106
107 // Allocates one or more pages.
108 //
109 // The requested |address| is just a hint; the actual address returned may
110 // differ. The returned address will be aligned to |align_offset| modulo |align|
111 // bytes.
112 //
113 // |length|, |align| and |align_offset| are in bytes, and must be a multiple of
114 // |PageAllocationGranularity()|. |length| and |align| must be non-zero.
115 // |align_offset| must be less than |align|. |align| must be a power of two.
116 //
117 // If |address| is 0/nullptr, then a suitable and randomized address will be
118 // chosen automatically.
119 //
120 // |accessibility| controls the permission of the allocated pages.
121 // PageAccessibilityConfiguration::kInaccessible means uncommitted.
122 //
123 // |page_tag| is used on some platforms to identify the source of the
124 // allocation.
125 //
126 // |file_descriptor_for_shared_alloc| is only used in mapping the shadow
127 // pools to the same physical address as the real one in
128 // PartitionAddressSpace::Init(). It should be ignored in other cases.
129 //
130 // This call will return 0/nullptr if the allocation cannot be satisfied.
131 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
132 uintptr_t AllocPages(size_t length,
133 size_t align,
134 PageAccessibilityConfiguration accessibility,
135 PageTag page_tag = PageTag::kChromium,
136 int file_descriptor_for_shared_alloc = -1);
137 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
138 uintptr_t AllocPages(uintptr_t address,
139 size_t length,
140 size_t align,
141 PageAccessibilityConfiguration accessibility,
142 PageTag page_tag = PageTag::kChromium);
143 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
144 void* AllocPages(void* address,
145 size_t length,
146 size_t align,
147 PageAccessibilityConfiguration accessibility,
148 PageTag page_tag = PageTag::kChromium);
149 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
150 uintptr_t AllocPagesWithAlignOffset(
151 uintptr_t address,
152 size_t length,
153 size_t align,
154 size_t align_offset,
155 PageAccessibilityConfiguration page_accessibility,
156 PageTag page_tag = PageTag::kChromium,
157 int file_descriptor_for_shared_alloc = -1);
158
159 // Frees one or more pages starting at |address| and continuing for |length|
160 // bytes.
161 //
162 // |address| and |length| must match a previous call to |AllocPages|. Therefore,
163 // |address| must be aligned to |PageAllocationGranularity()| bytes, and
164 // |length| must be a multiple of |PageAllocationGranularity()|.
165 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
166 void FreePages(uintptr_t address, size_t length);
167 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
168 void FreePages(void* address, size_t length);
169
170 // Marks one or more system pages, starting at |address| with the given
171 // |page_accessibility|. |length| must be a multiple of |SystemPageSize()|
172 // bytes.
173 //
174 // Returns true if the permission change succeeded. In most cases you must
175 // |CHECK| the result.
176 [[nodiscard]] PA_COMPONENT_EXPORT(PARTITION_ALLOC) bool TrySetSystemPagesAccess(
177 uintptr_t address,
178 size_t length,
179 PageAccessibilityConfiguration page_accessibility);
180 [[nodiscard]] PA_COMPONENT_EXPORT(PARTITION_ALLOC) bool TrySetSystemPagesAccess(
181 void* address,
182 size_t length,
183 PageAccessibilityConfiguration page_accessibility);
184
185 // Marks one or more system pages, starting at |address| with the given
186 // |page_accessibility|. |length| must be a multiple of |SystemPageSize()|
187 // bytes.
188 //
189 // Performs a CHECK that the operation succeeds.
190 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
191 void SetSystemPagesAccess(uintptr_t address,
192 size_t length,
193 PageAccessibilityConfiguration page_accessibility);
194 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
195 void SetSystemPagesAccess(void* address,
196 size_t length,
197 PageAccessibilityConfiguration page_accessibility);
198
199 // Decommits one or more system pages starting at |address| and continuing for
200 // |length| bytes. |address| and |length| must be aligned to a system page
201 // boundary.
202 //
203 // This API will crash if the operation cannot be performed!
204 //
205 // If disposition is PageAccessibilityDisposition::kRequireUpdate (recommended),
206 // the decommitted pages will be made inaccessible before the call returns.
207 // While it is always a programming error to access decommitted pages without
208 // first recommitting them, callers may use
209 // PageAccessibilityDisposition::kAllowKeepForPerf to allow the implementation
210 // to skip changing permissions (use with care), for performance reasons (see
211 // crrev.com/c/2567282 and crrev.com/c/2563038 for perf regressions encountered
212 // in the past). Implementations may choose to always modify permissions, hence
213 // accessing those pages may or may not trigger a fault.
214 //
215 // Decommitting means that physical resources (RAM or swap/pagefile) backing the
216 // allocated virtual address range may be released back to the system, but the
217 // address space is still allocated to the process (possibly using up page table
218 // entries or other accounting resources). There is no guarantee that the pages
219 // are zeroed, unless |DecommittedMemoryIsAlwaysZeroed()| is true.
220 //
221 // This operation may not be atomic on some platforms.
222 //
223 // Note: "Committed memory" is a Windows Memory Subsystem concept that ensures
224 // processes will not fault when touching a committed memory region. There is
225 // no analogue in the POSIX & Fuchsia memory API where virtual memory pages are
226 // best-effort allocated resources on the first touch. If
227 // PageAccessibilityDisposition::kRequireUpdate disposition is used, this API
228 // behaves in a platform-agnostic way by simulating the Windows "decommit" state
229 // by both discarding the region (allowing the OS to avoid swap operations)
230 // *and* changing the page protections so accesses fault.
231 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
232 void DecommitSystemPages(
233 uintptr_t address,
234 size_t length,
235 PageAccessibilityDisposition accessibility_disposition);
236 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
237 void DecommitSystemPages(
238 void* address,
239 size_t length,
240 PageAccessibilityDisposition accessibility_disposition);
241
242 // Decommits one or more system pages starting at |address| and continuing for
243 // |length| bytes. |address| and |length| must be aligned to a system page
244 // boundary.
245 //
246 // In contrast to |DecommitSystemPages|, this API guarantees that the pages are
247 // zeroed and will always mark the region as inaccessible (the equivalent of
248 // setting them to PageAccessibilityConfiguration::kInaccessible).
249 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
250 bool DecommitAndZeroSystemPages(uintptr_t address,
251 size_t length,
252 PageTag page_tag = PageTag::kChromium);
253 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
254 bool DecommitAndZeroSystemPages(void* address,
255 size_t length,
256 PageTag page_tag = PageTag::kChromium);
257
258 // Whether decommitted memory is guaranteed to be zeroed when it is
259 // recommitted. Do not assume that this will not change over time.
PA_COMPONENT_EXPORT(PARTITION_ALLOC)260 constexpr PA_COMPONENT_EXPORT(
261 PARTITION_ALLOC) bool DecommittedMemoryIsAlwaysZeroed() {
262 #if BUILDFLAG(IS_APPLE)
263 return false;
264 #else
265 return true;
266 #endif
267 }
268
269 // (Re)Commits one or more system pages, starting at |address| and continuing
270 // for |length| bytes with the given |page_accessibility| (must not be
271 // PageAccessibilityConfiguration::kInaccessible). |address| and |length|
272 // must be aligned to a system page boundary.
273 //
274 // This API will crash if the operation cannot be performed!
275 //
276 // If disposition is PageAccessibilityConfiguration::kRequireUpdate, the calls
277 // updates the pages to |page_accessibility|. This can be used regardless of
278 // what disposition was used to decommit the pages.
279 // PageAccessibilityConfiguration::kAllowKeepForPerf allows the implementation
280 // to leave the page permissions, if that improves performance. This option can
281 // only be used if the pages were previously accessible and decommitted with
282 // that same option.
283 //
284 // The memory will be zeroed when it is committed for the first time. However,
285 // there is no such guarantee when memory is recommitted, unless
286 // |DecommittedMemoryIsAlwaysZeroed()| is true.
287 //
288 // This operation may not be atomic on some platforms.
289 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
290 void RecommitSystemPages(
291 uintptr_t address,
292 size_t length,
293 PageAccessibilityConfiguration page_accessibility,
294 PageAccessibilityDisposition accessibility_disposition);
295
296 // Like RecommitSystemPages(), but returns false instead of crashing.
297 [[nodiscard]] PA_COMPONENT_EXPORT(PARTITION_ALLOC) bool TryRecommitSystemPages(
298 uintptr_t address,
299 size_t length,
300 PageAccessibilityConfiguration page_accessibility,
301 PageAccessibilityDisposition accessibility_disposition);
302
303 // Discard one or more system pages starting at |address| and continuing for
304 // |length| bytes. |length| must be a multiple of |SystemPageSize()|.
305 //
306 // Discarding is a hint to the system that the page is no longer required. The
307 // hint may:
308 // - Do nothing.
309 // - Discard the page immediately, freeing up physical pages.
310 // - Discard the page at some time in the future in response to memory
311 // pressure.
312 //
313 // Only committed pages should be discarded. Discarding a page does not decommit
314 // it, and it is valid to discard an already-discarded page. A read or write to
315 // a discarded page will not fault.
316 //
317 // Reading from a discarded page may return the original page content, or a page
318 // full of zeroes.
319 //
320 // Writing to a discarded page is the only guaranteed way to tell the system
321 // that the page is required again. Once written to, the content of the page is
322 // guaranteed stable once more. After being written to, the page content may be
323 // based on the original page content, or a page of zeroes.
324 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
325 void DiscardSystemPages(uintptr_t address, size_t length);
326 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
327 void DiscardSystemPages(void* address, size_t length);
328
329 // Rounds up |address| to the next multiple of |SystemPageSize()|. Returns
330 // 0 for an |address| of 0.
331 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR uintptr_t
RoundUpToSystemPage(uintptr_t address)332 RoundUpToSystemPage(uintptr_t address) {
333 return (address + internal::SystemPageOffsetMask()) &
334 internal::SystemPageBaseMask();
335 }
336
337 // Rounds down |address| to the previous multiple of |SystemPageSize()|. Returns
338 // 0 for an |address| of 0.
339 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR uintptr_t
RoundDownToSystemPage(uintptr_t address)340 RoundDownToSystemPage(uintptr_t address) {
341 return address & internal::SystemPageBaseMask();
342 }
343
344 // Rounds up |address| to the next multiple of |PageAllocationGranularity()|.
345 // Returns 0 for an |address| of 0.
346 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR uintptr_t
RoundUpToPageAllocationGranularity(uintptr_t address)347 RoundUpToPageAllocationGranularity(uintptr_t address) {
348 return (address + internal::PageAllocationGranularityOffsetMask()) &
349 internal::PageAllocationGranularityBaseMask();
350 }
351
352 // Rounds down |address| to the previous multiple of
353 // |PageAllocationGranularity()|. Returns 0 for an |address| of 0.
354 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR uintptr_t
RoundDownToPageAllocationGranularity(uintptr_t address)355 RoundDownToPageAllocationGranularity(uintptr_t address) {
356 return address & internal::PageAllocationGranularityBaseMask();
357 }
358
359 // Reserves (at least) |size| bytes of address space, aligned to
360 // |PageAllocationGranularity()|. This can be called early on to make it more
361 // likely that large allocations will succeed. Returns true if the reservation
362 // succeeded, false if the reservation failed or a reservation was already made.
363 PA_COMPONENT_EXPORT(PARTITION_ALLOC) bool ReserveAddressSpace(size_t size);
364
365 // Releases any reserved address space. |AllocPages| calls this automatically on
366 // an allocation failure. External allocators may also call this on failure.
367 //
368 // Returns true when an existing reservation was released.
369 PA_COMPONENT_EXPORT(PARTITION_ALLOC) bool ReleaseReservation();
370
371 // Returns true if there is currently an address space reservation.
372 PA_COMPONENT_EXPORT(PARTITION_ALLOC) bool HasReservationForTesting();
373
374 // Returns |errno| (POSIX) or the result of |GetLastError| (Windows) when |mmap|
375 // (POSIX) or |VirtualAlloc| (Windows) fails.
376 PA_COMPONENT_EXPORT(PARTITION_ALLOC) uint32_t GetAllocPageErrorCode();
377
378 // Returns the total amount of mapped pages from all clients of
379 // PageAllocator. These pages may or may not be committed. This is mostly useful
380 // to assess address space pressure.
381 PA_COMPONENT_EXPORT(PARTITION_ALLOC) size_t GetTotalMappedSize();
382
383 #if BUILDFLAG(IS_WIN)
384 // Sets whether to retry the allocation of pages when a commit failure
385 // happens. This doesn't cover cases where the system is out of address space,
386 // or reaches another limit.
387 PA_COMPONENT_EXPORT(PARTITION_ALLOC)
388 void SetRetryOnCommitFailure(bool retry_on_commit_failure);
389 bool GetRetryOnCommitFailure();
390 #endif // BUILDFLAG(IS_WIN)
391
392 } // namespace partition_alloc
393
394 #endif // PARTITION_ALLOC_PAGE_ALLOCATOR_H_
395