xref: /aosp_15_r20/external/crosvm/swap/src/userfaultfd.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1*bb4ee6a4SAndroid Build Coastguard Worker // Copyright 2022 The ChromiumOS Authors
2*bb4ee6a4SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*bb4ee6a4SAndroid Build Coastguard Worker // found in the LICENSE file.
4*bb4ee6a4SAndroid Build Coastguard Worker 
5*bb4ee6a4SAndroid Build Coastguard Worker //! Provides wrapper of userfaultfd crate for vmm-swap feature.
6*bb4ee6a4SAndroid Build Coastguard Worker 
7*bb4ee6a4SAndroid Build Coastguard Worker #![deny(missing_docs)]
8*bb4ee6a4SAndroid Build Coastguard Worker 
9*bb4ee6a4SAndroid Build Coastguard Worker use std::convert::From;
10*bb4ee6a4SAndroid Build Coastguard Worker use std::fs::File;
11*bb4ee6a4SAndroid Build Coastguard Worker use std::fs::OpenOptions;
12*bb4ee6a4SAndroid Build Coastguard Worker use std::ops::Range;
13*bb4ee6a4SAndroid Build Coastguard Worker use std::os::unix::io::AsRawFd;
14*bb4ee6a4SAndroid Build Coastguard Worker use std::os::unix::prelude::FromRawFd;
15*bb4ee6a4SAndroid Build Coastguard Worker use std::os::unix::prelude::OpenOptionsExt;
16*bb4ee6a4SAndroid Build Coastguard Worker 
17*bb4ee6a4SAndroid Build Coastguard Worker use anyhow::Context;
18*bb4ee6a4SAndroid Build Coastguard Worker use base::errno_result;
19*bb4ee6a4SAndroid Build Coastguard Worker use base::info;
20*bb4ee6a4SAndroid Build Coastguard Worker use base::ioctl_io_nr;
21*bb4ee6a4SAndroid Build Coastguard Worker use base::ioctl_iowr_nr;
22*bb4ee6a4SAndroid Build Coastguard Worker use base::ioctl_with_mut_ref;
23*bb4ee6a4SAndroid Build Coastguard Worker use base::ioctl_with_val;
24*bb4ee6a4SAndroid Build Coastguard Worker use base::linux::MemoryMappingUnix;
25*bb4ee6a4SAndroid Build Coastguard Worker use base::AsRawDescriptor;
26*bb4ee6a4SAndroid Build Coastguard Worker use base::AsRawDescriptors;
27*bb4ee6a4SAndroid Build Coastguard Worker use base::FromRawDescriptor;
28*bb4ee6a4SAndroid Build Coastguard Worker use base::IntoRawDescriptor;
29*bb4ee6a4SAndroid Build Coastguard Worker use base::MappedRegion;
30*bb4ee6a4SAndroid Build Coastguard Worker use base::MemoryMapping;
31*bb4ee6a4SAndroid Build Coastguard Worker use base::MemoryMappingBuilder;
32*bb4ee6a4SAndroid Build Coastguard Worker use base::RawDescriptor;
33*bb4ee6a4SAndroid Build Coastguard Worker use thiserror::Error as ThisError;
34*bb4ee6a4SAndroid Build Coastguard Worker use userfaultfd::Error as UffdError;
35*bb4ee6a4SAndroid Build Coastguard Worker pub use userfaultfd::Event as UffdEvent;
36*bb4ee6a4SAndroid Build Coastguard Worker use userfaultfd::FeatureFlags;
37*bb4ee6a4SAndroid Build Coastguard Worker use userfaultfd::IoctlFlags;
38*bb4ee6a4SAndroid Build Coastguard Worker use userfaultfd::Uffd;
39*bb4ee6a4SAndroid Build Coastguard Worker use userfaultfd::UffdBuilder;
40*bb4ee6a4SAndroid Build Coastguard Worker 
41*bb4ee6a4SAndroid Build Coastguard Worker use crate::pagesize::pages_to_bytes;
42*bb4ee6a4SAndroid Build Coastguard Worker 
43*bb4ee6a4SAndroid Build Coastguard Worker const DEV_USERFAULTFD_PATH: &str = "/dev/userfaultfd";
44*bb4ee6a4SAndroid Build Coastguard Worker const USERFAULTFD_IOC: u32 = 0xAA;
45*bb4ee6a4SAndroid Build Coastguard Worker ioctl_io_nr!(USERFAULTFD_IOC_NEW, USERFAULTFD_IOC, 0x00);
46*bb4ee6a4SAndroid Build Coastguard Worker ioctl_iowr_nr!(
47*bb4ee6a4SAndroid Build Coastguard Worker     UFFDIO_API,
48*bb4ee6a4SAndroid Build Coastguard Worker     userfaultfd_sys::UFFDIO,
49*bb4ee6a4SAndroid Build Coastguard Worker     userfaultfd_sys::_UFFDIO_API,
50*bb4ee6a4SAndroid Build Coastguard Worker     userfaultfd_sys::uffdio_api
51*bb4ee6a4SAndroid Build Coastguard Worker );
52*bb4ee6a4SAndroid Build Coastguard Worker 
53*bb4ee6a4SAndroid Build Coastguard Worker /// Result for Userfaultfd
54*bb4ee6a4SAndroid Build Coastguard Worker pub type Result<T> = std::result::Result<T, Error>;
55*bb4ee6a4SAndroid Build Coastguard Worker 
56*bb4ee6a4SAndroid Build Coastguard Worker /// Errors for Userfaultfd
57*bb4ee6a4SAndroid Build Coastguard Worker #[derive(ThisError, Debug)]
58*bb4ee6a4SAndroid Build Coastguard Worker pub enum Error {
59*bb4ee6a4SAndroid Build Coastguard Worker     #[error("userfaultfd error: {0:?}")]
60*bb4ee6a4SAndroid Build Coastguard Worker     /// unrecoverable userfaultfd error.
61*bb4ee6a4SAndroid Build Coastguard Worker     Userfaultfd(UffdError),
62*bb4ee6a4SAndroid Build Coastguard Worker     #[error("copy partially succeeded: {0:?} bytes copied")]
63*bb4ee6a4SAndroid Build Coastguard Worker     /// UFFDIO_COPY partillay succeed.
64*bb4ee6a4SAndroid Build Coastguard Worker     PartiallyCopied(usize),
65*bb4ee6a4SAndroid Build Coastguard Worker     #[error("the page is already filled")]
66*bb4ee6a4SAndroid Build Coastguard Worker     /// The page is already filled.
67*bb4ee6a4SAndroid Build Coastguard Worker     PageExist,
68*bb4ee6a4SAndroid Build Coastguard Worker     #[error("the uffd in the corresponding process is already closed")]
69*bb4ee6a4SAndroid Build Coastguard Worker     /// The corresponding process is already dead or has run exec(2).
70*bb4ee6a4SAndroid Build Coastguard Worker     UffdClosed,
71*bb4ee6a4SAndroid Build Coastguard Worker     #[error("clone error: {0:?}")]
72*bb4ee6a4SAndroid Build Coastguard Worker     /// Failed to clone userfaultfd.
73*bb4ee6a4SAndroid Build Coastguard Worker     Clone(base::Error),
74*bb4ee6a4SAndroid Build Coastguard Worker }
75*bb4ee6a4SAndroid Build Coastguard Worker 
76*bb4ee6a4SAndroid Build Coastguard Worker impl From<UffdError> for Error {
from(e: UffdError) -> Self77*bb4ee6a4SAndroid Build Coastguard Worker     fn from(e: UffdError) -> Self {
78*bb4ee6a4SAndroid Build Coastguard Worker         match e {
79*bb4ee6a4SAndroid Build Coastguard Worker             UffdError::PartiallyCopied(copied) => Self::PartiallyCopied(copied),
80*bb4ee6a4SAndroid Build Coastguard Worker             UffdError::CopyFailed(errno) if errno as i32 == libc::ESRCH => Self::UffdClosed,
81*bb4ee6a4SAndroid Build Coastguard Worker             UffdError::ZeropageFailed(errno) if errno as i32 == libc::EEXIST => Self::PageExist,
82*bb4ee6a4SAndroid Build Coastguard Worker             UffdError::ZeropageFailed(errno) if errno as i32 == libc::ESRCH => Self::UffdClosed,
83*bb4ee6a4SAndroid Build Coastguard Worker             other => Self::Userfaultfd(other),
84*bb4ee6a4SAndroid Build Coastguard Worker         }
85*bb4ee6a4SAndroid Build Coastguard Worker     }
86*bb4ee6a4SAndroid Build Coastguard Worker }
87*bb4ee6a4SAndroid Build Coastguard Worker 
88*bb4ee6a4SAndroid Build Coastguard Worker /// Register all the regions to all the userfaultfd
89*bb4ee6a4SAndroid Build Coastguard Worker ///
90*bb4ee6a4SAndroid Build Coastguard Worker /// # Arguments
91*bb4ee6a4SAndroid Build Coastguard Worker ///
92*bb4ee6a4SAndroid Build Coastguard Worker /// * `regions` - the list of address range of regions.
93*bb4ee6a4SAndroid Build Coastguard Worker /// * `uffds` - the reference to the list of [Userfaultfd] for all the processes which may touch the
94*bb4ee6a4SAndroid Build Coastguard Worker ///   `address_range` to be registered.
95*bb4ee6a4SAndroid Build Coastguard Worker ///
96*bb4ee6a4SAndroid Build Coastguard Worker /// # Safety
97*bb4ee6a4SAndroid Build Coastguard Worker ///
98*bb4ee6a4SAndroid Build Coastguard Worker /// Each address range in `regions` must be from guest memory.
99*bb4ee6a4SAndroid Build Coastguard Worker ///
100*bb4ee6a4SAndroid Build Coastguard Worker /// The `uffds` must cover all the processes which may touch the `address_range`. otherwise some
101*bb4ee6a4SAndroid Build Coastguard Worker /// pages are zeroed by kernel on the unregistered process instead of swapping in from the swap
102*bb4ee6a4SAndroid Build Coastguard Worker /// file.
103*bb4ee6a4SAndroid Build Coastguard Worker #[deny(unsafe_op_in_unsafe_fn)]
register_regions(regions: &[Range<usize>], uffds: &[Userfaultfd]) -> Result<()>104*bb4ee6a4SAndroid Build Coastguard Worker pub unsafe fn register_regions(regions: &[Range<usize>], uffds: &[Userfaultfd]) -> Result<()> {
105*bb4ee6a4SAndroid Build Coastguard Worker     for address_range in regions {
106*bb4ee6a4SAndroid Build Coastguard Worker         for uffd in uffds {
107*bb4ee6a4SAndroid Build Coastguard Worker             // SAFETY:
108*bb4ee6a4SAndroid Build Coastguard Worker             // Safe because the range is from the guest memory region.
109*bb4ee6a4SAndroid Build Coastguard Worker             let result = unsafe {
110*bb4ee6a4SAndroid Build Coastguard Worker                 uffd.register(address_range.start, address_range.end - address_range.start)
111*bb4ee6a4SAndroid Build Coastguard Worker             };
112*bb4ee6a4SAndroid Build Coastguard Worker             match result {
113*bb4ee6a4SAndroid Build Coastguard Worker                 Ok(_) => {}
114*bb4ee6a4SAndroid Build Coastguard Worker                 // Skip the userfaultfd for dead processes.
115*bb4ee6a4SAndroid Build Coastguard Worker                 Err(Error::UffdClosed) => {}
116*bb4ee6a4SAndroid Build Coastguard Worker                 Err(e) => {
117*bb4ee6a4SAndroid Build Coastguard Worker                     return Err(e);
118*bb4ee6a4SAndroid Build Coastguard Worker                 }
119*bb4ee6a4SAndroid Build Coastguard Worker             };
120*bb4ee6a4SAndroid Build Coastguard Worker         }
121*bb4ee6a4SAndroid Build Coastguard Worker     }
122*bb4ee6a4SAndroid Build Coastguard Worker     Ok(())
123*bb4ee6a4SAndroid Build Coastguard Worker }
124*bb4ee6a4SAndroid Build Coastguard Worker 
125*bb4ee6a4SAndroid Build Coastguard Worker /// Unregister all the regions from all the userfaultfd.
126*bb4ee6a4SAndroid Build Coastguard Worker ///
127*bb4ee6a4SAndroid Build Coastguard Worker /// `UFFDIO_UNREGISTER` unblocks any threads currently waiting on the region and remove page fault
128*bb4ee6a4SAndroid Build Coastguard Worker /// events on the region from the userfaultfd event queue.
129*bb4ee6a4SAndroid Build Coastguard Worker ///
130*bb4ee6a4SAndroid Build Coastguard Worker /// # Arguments
131*bb4ee6a4SAndroid Build Coastguard Worker ///
132*bb4ee6a4SAndroid Build Coastguard Worker /// * `regions` - the list of address range of regions.
133*bb4ee6a4SAndroid Build Coastguard Worker /// * `uffds` - the reference to the list of registered [Userfaultfd].
unregister_regions(regions: &[Range<usize>], uffds: &[Userfaultfd]) -> Result<()>134*bb4ee6a4SAndroid Build Coastguard Worker pub fn unregister_regions(regions: &[Range<usize>], uffds: &[Userfaultfd]) -> Result<()> {
135*bb4ee6a4SAndroid Build Coastguard Worker     for address_range in regions {
136*bb4ee6a4SAndroid Build Coastguard Worker         for uffd in uffds {
137*bb4ee6a4SAndroid Build Coastguard Worker             let result =
138*bb4ee6a4SAndroid Build Coastguard Worker                 uffd.unregister(address_range.start, address_range.end - address_range.start);
139*bb4ee6a4SAndroid Build Coastguard Worker             match result {
140*bb4ee6a4SAndroid Build Coastguard Worker                 Ok(_) => {}
141*bb4ee6a4SAndroid Build Coastguard Worker                 // Skip the userfaultfd for dead processes.
142*bb4ee6a4SAndroid Build Coastguard Worker                 Err(Error::UffdClosed) => {}
143*bb4ee6a4SAndroid Build Coastguard Worker                 Err(e) => {
144*bb4ee6a4SAndroid Build Coastguard Worker                     return Err(e);
145*bb4ee6a4SAndroid Build Coastguard Worker                 }
146*bb4ee6a4SAndroid Build Coastguard Worker             };
147*bb4ee6a4SAndroid Build Coastguard Worker         }
148*bb4ee6a4SAndroid Build Coastguard Worker     }
149*bb4ee6a4SAndroid Build Coastguard Worker     Ok(())
150*bb4ee6a4SAndroid Build Coastguard Worker }
151*bb4ee6a4SAndroid Build Coastguard Worker 
152*bb4ee6a4SAndroid Build Coastguard Worker /// Factory for [Userfaultfd].
153*bb4ee6a4SAndroid Build Coastguard Worker ///
154*bb4ee6a4SAndroid Build Coastguard Worker /// If `/dev/userfaultfd` (introduced from Linux 6.1) exists, creates userfaultfd from the dev file.
155*bb4ee6a4SAndroid Build Coastguard Worker /// Otherwise use `userfaultfd(2)` to create a userfaultfd.
156*bb4ee6a4SAndroid Build Coastguard Worker pub struct Factory {
157*bb4ee6a4SAndroid Build Coastguard Worker     dev_file: Option<File>,
158*bb4ee6a4SAndroid Build Coastguard Worker }
159*bb4ee6a4SAndroid Build Coastguard Worker 
160*bb4ee6a4SAndroid Build Coastguard Worker impl Default for Factory {
default() -> Self161*bb4ee6a4SAndroid Build Coastguard Worker     fn default() -> Self {
162*bb4ee6a4SAndroid Build Coastguard Worker         Self::new()
163*bb4ee6a4SAndroid Build Coastguard Worker     }
164*bb4ee6a4SAndroid Build Coastguard Worker }
165*bb4ee6a4SAndroid Build Coastguard Worker 
166*bb4ee6a4SAndroid Build Coastguard Worker impl Factory {
167*bb4ee6a4SAndroid Build Coastguard Worker     /// Create [Factory] and try open `/dev/userfaultfd`.
168*bb4ee6a4SAndroid Build Coastguard Worker     ///
169*bb4ee6a4SAndroid Build Coastguard Worker     /// If it fails to open `/dev/userfaultfd`, userfaultfd creation fallback to `userfaultfd(2)`
170*bb4ee6a4SAndroid Build Coastguard Worker     /// syscall.
new() -> Self171*bb4ee6a4SAndroid Build Coastguard Worker     pub fn new() -> Self {
172*bb4ee6a4SAndroid Build Coastguard Worker         let dev_file = OpenOptions::new()
173*bb4ee6a4SAndroid Build Coastguard Worker             .read(true)
174*bb4ee6a4SAndroid Build Coastguard Worker             .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
175*bb4ee6a4SAndroid Build Coastguard Worker             .open(DEV_USERFAULTFD_PATH);
176*bb4ee6a4SAndroid Build Coastguard Worker         match dev_file {
177*bb4ee6a4SAndroid Build Coastguard Worker             Ok(dev_file) => Self {
178*bb4ee6a4SAndroid Build Coastguard Worker                 dev_file: Some(dev_file),
179*bb4ee6a4SAndroid Build Coastguard Worker             },
180*bb4ee6a4SAndroid Build Coastguard Worker             Err(e) => {
181*bb4ee6a4SAndroid Build Coastguard Worker                 info!(
182*bb4ee6a4SAndroid Build Coastguard Worker                     "Failed to open /dev/userfaultfd ({:?}), will fall back to userfaultfd(2)",
183*bb4ee6a4SAndroid Build Coastguard Worker                     e
184*bb4ee6a4SAndroid Build Coastguard Worker                 );
185*bb4ee6a4SAndroid Build Coastguard Worker                 Self { dev_file: None }
186*bb4ee6a4SAndroid Build Coastguard Worker             }
187*bb4ee6a4SAndroid Build Coastguard Worker         }
188*bb4ee6a4SAndroid Build Coastguard Worker     }
189*bb4ee6a4SAndroid Build Coastguard Worker 
190*bb4ee6a4SAndroid Build Coastguard Worker     /// Creates a new [Userfaultfd] for this process.
create(&self) -> anyhow::Result<Userfaultfd>191*bb4ee6a4SAndroid Build Coastguard Worker     pub fn create(&self) -> anyhow::Result<Userfaultfd> {
192*bb4ee6a4SAndroid Build Coastguard Worker         if let Some(dev_file) = &self.dev_file {
193*bb4ee6a4SAndroid Build Coastguard Worker             // SAFETY:
194*bb4ee6a4SAndroid Build Coastguard Worker             // Safe because ioctl(2) USERFAULTFD_IOC_NEW with does not change Rust memory safety.
195*bb4ee6a4SAndroid Build Coastguard Worker             let res = unsafe {
196*bb4ee6a4SAndroid Build Coastguard Worker                 ioctl_with_val(
197*bb4ee6a4SAndroid Build Coastguard Worker                     dev_file,
198*bb4ee6a4SAndroid Build Coastguard Worker                     USERFAULTFD_IOC_NEW,
199*bb4ee6a4SAndroid Build Coastguard Worker                     (libc::O_CLOEXEC | libc::O_NONBLOCK) as libc::c_ulong,
200*bb4ee6a4SAndroid Build Coastguard Worker                 )
201*bb4ee6a4SAndroid Build Coastguard Worker             };
202*bb4ee6a4SAndroid Build Coastguard Worker             let uffd = if res < 0 {
203*bb4ee6a4SAndroid Build Coastguard Worker                 return errno_result().context("USERFAULTFD_IOC_NEW");
204*bb4ee6a4SAndroid Build Coastguard Worker             } else {
205*bb4ee6a4SAndroid Build Coastguard Worker                 // Safe because the uffd is not owned by anyone in this process.
206*bb4ee6a4SAndroid Build Coastguard Worker                 // SAFETY:
207*bb4ee6a4SAndroid Build Coastguard Worker                 unsafe { Userfaultfd::from_raw_descriptor(res) }
208*bb4ee6a4SAndroid Build Coastguard Worker             };
209*bb4ee6a4SAndroid Build Coastguard Worker             let mut api = userfaultfd_sys::uffdio_api {
210*bb4ee6a4SAndroid Build Coastguard Worker                 api: userfaultfd_sys::UFFD_API,
211*bb4ee6a4SAndroid Build Coastguard Worker                 features: (FeatureFlags::MISSING_SHMEM | FeatureFlags::EVENT_REMOVE).bits(),
212*bb4ee6a4SAndroid Build Coastguard Worker                 ioctls: 0,
213*bb4ee6a4SAndroid Build Coastguard Worker             };
214*bb4ee6a4SAndroid Build Coastguard Worker             // SAFETY:
215*bb4ee6a4SAndroid Build Coastguard Worker             // Safe because ioctl(2) UFFDIO_API with does not change Rust memory safety.
216*bb4ee6a4SAndroid Build Coastguard Worker             let res = unsafe { ioctl_with_mut_ref(&uffd, UFFDIO_API, &mut api) };
217*bb4ee6a4SAndroid Build Coastguard Worker             if res < 0 {
218*bb4ee6a4SAndroid Build Coastguard Worker                 errno_result().context("UFFDIO_API")
219*bb4ee6a4SAndroid Build Coastguard Worker             } else {
220*bb4ee6a4SAndroid Build Coastguard Worker                 Ok(uffd)
221*bb4ee6a4SAndroid Build Coastguard Worker             }
222*bb4ee6a4SAndroid Build Coastguard Worker         } else {
223*bb4ee6a4SAndroid Build Coastguard Worker             Userfaultfd::new().context("create userfaultfd")
224*bb4ee6a4SAndroid Build Coastguard Worker         }
225*bb4ee6a4SAndroid Build Coastguard Worker     }
226*bb4ee6a4SAndroid Build Coastguard Worker 
227*bb4ee6a4SAndroid Build Coastguard Worker     /// Create a new [Factory] object.
try_clone(&self) -> anyhow::Result<Self>228*bb4ee6a4SAndroid Build Coastguard Worker     pub fn try_clone(&self) -> anyhow::Result<Self> {
229*bb4ee6a4SAndroid Build Coastguard Worker         let dev_file = self.dev_file.as_ref().map(File::try_clone).transpose()?;
230*bb4ee6a4SAndroid Build Coastguard Worker         Ok(Self { dev_file })
231*bb4ee6a4SAndroid Build Coastguard Worker     }
232*bb4ee6a4SAndroid Build Coastguard Worker }
233*bb4ee6a4SAndroid Build Coastguard Worker 
234*bb4ee6a4SAndroid Build Coastguard Worker impl AsRawDescriptors for Factory {
as_raw_descriptors(&self) -> Vec<RawDescriptor>235*bb4ee6a4SAndroid Build Coastguard Worker     fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
236*bb4ee6a4SAndroid Build Coastguard Worker         if let Some(dev_file) = &self.dev_file {
237*bb4ee6a4SAndroid Build Coastguard Worker             vec![dev_file.as_raw_descriptor()]
238*bb4ee6a4SAndroid Build Coastguard Worker         } else {
239*bb4ee6a4SAndroid Build Coastguard Worker             Vec::new()
240*bb4ee6a4SAndroid Build Coastguard Worker         }
241*bb4ee6a4SAndroid Build Coastguard Worker     }
242*bb4ee6a4SAndroid Build Coastguard Worker }
243*bb4ee6a4SAndroid Build Coastguard Worker 
244*bb4ee6a4SAndroid Build Coastguard Worker /// Wrapper for [`userfaultfd::Uffd`] to be used in the vmm-swap feature.
245*bb4ee6a4SAndroid Build Coastguard Worker ///
246*bb4ee6a4SAndroid Build Coastguard Worker /// # Safety
247*bb4ee6a4SAndroid Build Coastguard Worker ///
248*bb4ee6a4SAndroid Build Coastguard Worker /// The userfaultfd operations (`UFFDIO_COPY` and `UFFDIO_ZEROPAGE`) looks unsafe since it fills a
249*bb4ee6a4SAndroid Build Coastguard Worker /// memory content directly. But they actually are not unsafe operation but `UFFDIO_REGISTER` should
250*bb4ee6a4SAndroid Build Coastguard Worker /// be the unsafe operation for Rust memory safety.
251*bb4ee6a4SAndroid Build Coastguard Worker ///
252*bb4ee6a4SAndroid Build Coastguard Worker /// According to [the Rust document](https://doc.rust-lang.org/nomicon/uninitialized.html),
253*bb4ee6a4SAndroid Build Coastguard Worker ///
254*bb4ee6a4SAndroid Build Coastguard Worker /// > All runtime-allocated memory in a Rust program begins its life as uninitialized.
255*bb4ee6a4SAndroid Build Coastguard Worker ///
256*bb4ee6a4SAndroid Build Coastguard Worker /// The userfaultfd operations actually does not change/overwrite the existing memory contents but
257*bb4ee6a4SAndroid Build Coastguard Worker /// they just setup the "uninitialized" pages. If the page was already initialized, the userfaultfd
258*bb4ee6a4SAndroid Build Coastguard Worker /// operations fail and return EEXIST error (which is not documented unfortunately). So they
259*bb4ee6a4SAndroid Build Coastguard Worker /// originally does not affect the Rust memory safety.
260*bb4ee6a4SAndroid Build Coastguard Worker ///
261*bb4ee6a4SAndroid Build Coastguard Worker /// The "uninitialized" page in this context has 2 patterns:
262*bb4ee6a4SAndroid Build Coastguard Worker ///
263*bb4ee6a4SAndroid Build Coastguard Worker /// 1. pages which is never touched or,
264*bb4ee6a4SAndroid Build Coastguard Worker /// 2. pages which is never touched after MADV_REMOVE
265*bb4ee6a4SAndroid Build Coastguard Worker ///
266*bb4ee6a4SAndroid Build Coastguard Worker /// Filling the (1) pages with any contents should not affect the Rust memory safety.
267*bb4ee6a4SAndroid Build Coastguard Worker ///
268*bb4ee6a4SAndroid Build Coastguard Worker /// Filling the (2) pages potentially may break the memory used by Rust. But the safety should be
269*bb4ee6a4SAndroid Build Coastguard Worker /// examined at `MADV_REMOVE` and `UFFDIO_REGISTER` timing.
270*bb4ee6a4SAndroid Build Coastguard Worker #[derive(Debug)]
271*bb4ee6a4SAndroid Build Coastguard Worker pub struct Userfaultfd {
272*bb4ee6a4SAndroid Build Coastguard Worker     uffd: Uffd,
273*bb4ee6a4SAndroid Build Coastguard Worker }
274*bb4ee6a4SAndroid Build Coastguard Worker 
275*bb4ee6a4SAndroid Build Coastguard Worker impl Userfaultfd {
276*bb4ee6a4SAndroid Build Coastguard Worker     /// Creates a new userfaultfd using userfaultfd(2) syscall.
277*bb4ee6a4SAndroid Build Coastguard Worker     ///
278*bb4ee6a4SAndroid Build Coastguard Worker     /// This is public for tests.
new() -> Result<Self>279*bb4ee6a4SAndroid Build Coastguard Worker     pub fn new() -> Result<Self> {
280*bb4ee6a4SAndroid Build Coastguard Worker         let uffd = UffdBuilder::new()
281*bb4ee6a4SAndroid Build Coastguard Worker             .close_on_exec(true)
282*bb4ee6a4SAndroid Build Coastguard Worker             .non_blocking(true)
283*bb4ee6a4SAndroid Build Coastguard Worker             .user_mode_only(false)
284*bb4ee6a4SAndroid Build Coastguard Worker             .require_features(FeatureFlags::MISSING_SHMEM | FeatureFlags::EVENT_REMOVE)
285*bb4ee6a4SAndroid Build Coastguard Worker             .create()?;
286*bb4ee6a4SAndroid Build Coastguard Worker         Ok(Self { uffd })
287*bb4ee6a4SAndroid Build Coastguard Worker     }
288*bb4ee6a4SAndroid Build Coastguard Worker 
289*bb4ee6a4SAndroid Build Coastguard Worker     /// Register a range of memory to the userfaultfd.
290*bb4ee6a4SAndroid Build Coastguard Worker     ///
291*bb4ee6a4SAndroid Build Coastguard Worker     /// After this registration, any page faults on the range will be caught by the userfaultfd.
292*bb4ee6a4SAndroid Build Coastguard Worker     ///
293*bb4ee6a4SAndroid Build Coastguard Worker     /// # Arguments
294*bb4ee6a4SAndroid Build Coastguard Worker     ///
295*bb4ee6a4SAndroid Build Coastguard Worker     /// * `addr` - the starting address of the range of memory.
296*bb4ee6a4SAndroid Build Coastguard Worker     /// * `len` - the length in bytes of the range of memory.
297*bb4ee6a4SAndroid Build Coastguard Worker     ///
298*bb4ee6a4SAndroid Build Coastguard Worker     /// # Safety
299*bb4ee6a4SAndroid Build Coastguard Worker     ///
300*bb4ee6a4SAndroid Build Coastguard Worker     /// [addr, addr+len) must lie within a [MemoryMapping], and that mapping
301*bb4ee6a4SAndroid Build Coastguard Worker     /// must live for the lifespan of the userfaultfd kernel object (which may be distinct from the
302*bb4ee6a4SAndroid Build Coastguard Worker     /// `Userfaultfd` rust object in this process).
register(&self, addr: usize, len: usize) -> Result<IoctlFlags>303*bb4ee6a4SAndroid Build Coastguard Worker     pub unsafe fn register(&self, addr: usize, len: usize) -> Result<IoctlFlags> {
304*bb4ee6a4SAndroid Build Coastguard Worker         match self.uffd.register(addr as *mut libc::c_void, len) {
305*bb4ee6a4SAndroid Build Coastguard Worker             Ok(flags) => Ok(flags),
306*bb4ee6a4SAndroid Build Coastguard Worker             Err(UffdError::SystemError(errno)) if errno as i32 == libc::ENOMEM => {
307*bb4ee6a4SAndroid Build Coastguard Worker                 // Userfaultfd returns `ENOMEM` if the corresponding process dies or run as another
308*bb4ee6a4SAndroid Build Coastguard Worker                 // program by `exec` system call.
309*bb4ee6a4SAndroid Build Coastguard Worker                 // TODO(b/267124393): Verify UFFDIO_ZEROPAGE + ESRCH as well since ENOMEM may be for
310*bb4ee6a4SAndroid Build Coastguard Worker                 // other reasons.
311*bb4ee6a4SAndroid Build Coastguard Worker                 Err(Error::UffdClosed)
312*bb4ee6a4SAndroid Build Coastguard Worker             }
313*bb4ee6a4SAndroid Build Coastguard Worker             Err(e) => Err(e.into()),
314*bb4ee6a4SAndroid Build Coastguard Worker         }
315*bb4ee6a4SAndroid Build Coastguard Worker     }
316*bb4ee6a4SAndroid Build Coastguard Worker 
317*bb4ee6a4SAndroid Build Coastguard Worker     /// Unregister a range of memory from the userfaultfd.
318*bb4ee6a4SAndroid Build Coastguard Worker     ///
319*bb4ee6a4SAndroid Build Coastguard Worker     /// # Arguments
320*bb4ee6a4SAndroid Build Coastguard Worker     ///
321*bb4ee6a4SAndroid Build Coastguard Worker     /// * `addr` - the starting address of the range of memory.
322*bb4ee6a4SAndroid Build Coastguard Worker     /// * `len` - the length in bytes of the range of memory.
unregister(&self, addr: usize, len: usize) -> Result<()>323*bb4ee6a4SAndroid Build Coastguard Worker     pub fn unregister(&self, addr: usize, len: usize) -> Result<()> {
324*bb4ee6a4SAndroid Build Coastguard Worker         match self.uffd.unregister(addr as *mut libc::c_void, len) {
325*bb4ee6a4SAndroid Build Coastguard Worker             Ok(_) => Ok(()),
326*bb4ee6a4SAndroid Build Coastguard Worker             Err(UffdError::SystemError(errno)) if errno as i32 == libc::ENOMEM => {
327*bb4ee6a4SAndroid Build Coastguard Worker                 // Userfaultfd returns `ENOMEM` if the corresponding process dies or run as another
328*bb4ee6a4SAndroid Build Coastguard Worker                 // program by `exec` system call.
329*bb4ee6a4SAndroid Build Coastguard Worker                 // TODO(b/267124393): Verify UFFDIO_ZEROPAGE + ESRCH as well since ENOMEM may be for
330*bb4ee6a4SAndroid Build Coastguard Worker                 // other reasons.
331*bb4ee6a4SAndroid Build Coastguard Worker                 Err(Error::UffdClosed)
332*bb4ee6a4SAndroid Build Coastguard Worker             }
333*bb4ee6a4SAndroid Build Coastguard Worker             Err(e) => Err(e.into()),
334*bb4ee6a4SAndroid Build Coastguard Worker         }
335*bb4ee6a4SAndroid Build Coastguard Worker     }
336*bb4ee6a4SAndroid Build Coastguard Worker 
337*bb4ee6a4SAndroid Build Coastguard Worker     /// Initialize page(s) and fill it with zero.
338*bb4ee6a4SAndroid Build Coastguard Worker     ///
339*bb4ee6a4SAndroid Build Coastguard Worker     /// # Arguments
340*bb4ee6a4SAndroid Build Coastguard Worker     ///
341*bb4ee6a4SAndroid Build Coastguard Worker     /// * `addr` - the starting address of the page(s) to be initialzed with zero.
342*bb4ee6a4SAndroid Build Coastguard Worker     /// * `len` - the length in bytes of the page(s).
343*bb4ee6a4SAndroid Build Coastguard Worker     /// * `wake` - whether or not to unblock the faulting thread.
zero(&self, addr: usize, len: usize, wake: bool) -> Result<usize>344*bb4ee6a4SAndroid Build Coastguard Worker     pub fn zero(&self, addr: usize, len: usize, wake: bool) -> Result<usize> {
345*bb4ee6a4SAndroid Build Coastguard Worker         // SAFETY:
346*bb4ee6a4SAndroid Build Coastguard Worker         // safe because zeroing untouched pages does not break the Rust memory safety since "All
347*bb4ee6a4SAndroid Build Coastguard Worker         // runtime-allocated memory in a Rust program begins its life as uninitialized."
348*bb4ee6a4SAndroid Build Coastguard Worker         // https://doc.rust-lang.org/nomicon/uninitialized.html
349*bb4ee6a4SAndroid Build Coastguard Worker         Ok(unsafe { self.uffd.zeropage(addr as *mut libc::c_void, len, wake) }?)
350*bb4ee6a4SAndroid Build Coastguard Worker     }
351*bb4ee6a4SAndroid Build Coastguard Worker 
352*bb4ee6a4SAndroid Build Coastguard Worker     /// Copy the `data` to the page(s) starting from `addr`.
353*bb4ee6a4SAndroid Build Coastguard Worker     ///
354*bb4ee6a4SAndroid Build Coastguard Worker     /// # Arguments
355*bb4ee6a4SAndroid Build Coastguard Worker     ///
356*bb4ee6a4SAndroid Build Coastguard Worker     /// * `addr` - the starting address of the page(s) to be initialzed with data.
357*bb4ee6a4SAndroid Build Coastguard Worker     /// * `len` - the length in bytes of the page(s).
358*bb4ee6a4SAndroid Build Coastguard Worker     /// * `data` - the starting address of the content.
359*bb4ee6a4SAndroid Build Coastguard Worker     /// * `wake` - whether or not to unblock the faulting thread.
copy(&self, addr: usize, len: usize, data: *const u8, wake: bool) -> Result<usize>360*bb4ee6a4SAndroid Build Coastguard Worker     pub fn copy(&self, addr: usize, len: usize, data: *const u8, wake: bool) -> Result<usize> {
361*bb4ee6a4SAndroid Build Coastguard Worker         Ok(
362*bb4ee6a4SAndroid Build Coastguard Worker             // SAFETY:
363*bb4ee6a4SAndroid Build Coastguard Worker             // safe because filling untouched pages with data does not break the Rust memory safety
364*bb4ee6a4SAndroid Build Coastguard Worker             // since "All runtime-allocated memory in a Rust program begins its life as
365*bb4ee6a4SAndroid Build Coastguard Worker             // uninitialized." https://doc.rust-lang.org/nomicon/uninitialized.html
366*bb4ee6a4SAndroid Build Coastguard Worker             unsafe {
367*bb4ee6a4SAndroid Build Coastguard Worker                 self.uffd.copy(
368*bb4ee6a4SAndroid Build Coastguard Worker                     data as *const libc::c_void,
369*bb4ee6a4SAndroid Build Coastguard Worker                     addr as *mut libc::c_void,
370*bb4ee6a4SAndroid Build Coastguard Worker                     len,
371*bb4ee6a4SAndroid Build Coastguard Worker                     wake,
372*bb4ee6a4SAndroid Build Coastguard Worker                 )
373*bb4ee6a4SAndroid Build Coastguard Worker             }?,
374*bb4ee6a4SAndroid Build Coastguard Worker         )
375*bb4ee6a4SAndroid Build Coastguard Worker     }
376*bb4ee6a4SAndroid Build Coastguard Worker 
377*bb4ee6a4SAndroid Build Coastguard Worker     /// Wake the faulting thread blocked by the page(s).
378*bb4ee6a4SAndroid Build Coastguard Worker     ///
379*bb4ee6a4SAndroid Build Coastguard Worker     /// If the page is not initialized, the thread causes a page fault again.
380*bb4ee6a4SAndroid Build Coastguard Worker     ///
381*bb4ee6a4SAndroid Build Coastguard Worker     /// # Arguments
382*bb4ee6a4SAndroid Build Coastguard Worker     ///
383*bb4ee6a4SAndroid Build Coastguard Worker     /// * `addr` - the starting address of the page(s).
384*bb4ee6a4SAndroid Build Coastguard Worker     /// * `len` - the length in bytes of the page(s).
wake(&self, addr: usize, len: usize) -> Result<()>385*bb4ee6a4SAndroid Build Coastguard Worker     pub fn wake(&self, addr: usize, len: usize) -> Result<()> {
386*bb4ee6a4SAndroid Build Coastguard Worker         Ok(self.uffd.wake(addr as *mut libc::c_void, len)?)
387*bb4ee6a4SAndroid Build Coastguard Worker     }
388*bb4ee6a4SAndroid Build Coastguard Worker 
389*bb4ee6a4SAndroid Build Coastguard Worker     /// Read an event from the userfaultfd.
390*bb4ee6a4SAndroid Build Coastguard Worker     ///
391*bb4ee6a4SAndroid Build Coastguard Worker     /// Return `None` immediately if no events is ready to read.
read_event(&self) -> Result<Option<UffdEvent>>392*bb4ee6a4SAndroid Build Coastguard Worker     pub fn read_event(&self) -> Result<Option<UffdEvent>> {
393*bb4ee6a4SAndroid Build Coastguard Worker         Ok(self.uffd.read_event()?)
394*bb4ee6a4SAndroid Build Coastguard Worker     }
395*bb4ee6a4SAndroid Build Coastguard Worker 
396*bb4ee6a4SAndroid Build Coastguard Worker     /// Try to clone [Userfaultfd]
try_clone(&self) -> Result<Self>397*bb4ee6a4SAndroid Build Coastguard Worker     pub fn try_clone(&self) -> Result<Self> {
398*bb4ee6a4SAndroid Build Coastguard Worker         let dup_desc = base::clone_descriptor(self).map_err(Error::Clone)?;
399*bb4ee6a4SAndroid Build Coastguard Worker         // SAFETY: no one owns dup_desc.
400*bb4ee6a4SAndroid Build Coastguard Worker         let uffd = Self::from(unsafe { Uffd::from_raw_fd(dup_desc.into_raw_descriptor()) });
401*bb4ee6a4SAndroid Build Coastguard Worker         Ok(uffd)
402*bb4ee6a4SAndroid Build Coastguard Worker     }
403*bb4ee6a4SAndroid Build Coastguard Worker }
404*bb4ee6a4SAndroid Build Coastguard Worker 
405*bb4ee6a4SAndroid Build Coastguard Worker impl From<Uffd> for Userfaultfd {
from(uffd: Uffd) -> Self406*bb4ee6a4SAndroid Build Coastguard Worker     fn from(uffd: Uffd) -> Self {
407*bb4ee6a4SAndroid Build Coastguard Worker         Self { uffd }
408*bb4ee6a4SAndroid Build Coastguard Worker     }
409*bb4ee6a4SAndroid Build Coastguard Worker }
410*bb4ee6a4SAndroid Build Coastguard Worker 
411*bb4ee6a4SAndroid Build Coastguard Worker impl FromRawDescriptor for Userfaultfd {
from_raw_descriptor(descriptor: RawDescriptor) -> Self412*bb4ee6a4SAndroid Build Coastguard Worker     unsafe fn from_raw_descriptor(descriptor: RawDescriptor) -> Self {
413*bb4ee6a4SAndroid Build Coastguard Worker         Self::from(Uffd::from_raw_fd(descriptor))
414*bb4ee6a4SAndroid Build Coastguard Worker     }
415*bb4ee6a4SAndroid Build Coastguard Worker }
416*bb4ee6a4SAndroid Build Coastguard Worker 
417*bb4ee6a4SAndroid Build Coastguard Worker impl AsRawDescriptor for Userfaultfd {
as_raw_descriptor(&self) -> RawDescriptor418*bb4ee6a4SAndroid Build Coastguard Worker     fn as_raw_descriptor(&self) -> RawDescriptor {
419*bb4ee6a4SAndroid Build Coastguard Worker         self.uffd.as_raw_fd()
420*bb4ee6a4SAndroid Build Coastguard Worker     }
421*bb4ee6a4SAndroid Build Coastguard Worker }
422*bb4ee6a4SAndroid Build Coastguard Worker 
423*bb4ee6a4SAndroid Build Coastguard Worker /// Check whether the process for the [Userfaultfd] is dead or not.
424*bb4ee6a4SAndroid Build Coastguard Worker pub trait DeadUffdChecker {
425*bb4ee6a4SAndroid Build Coastguard Worker     /// Register the [Userfaultfd]
register(&self, uffd: &Userfaultfd) -> anyhow::Result<()>426*bb4ee6a4SAndroid Build Coastguard Worker     fn register(&self, uffd: &Userfaultfd) -> anyhow::Result<()>;
427*bb4ee6a4SAndroid Build Coastguard Worker     /// Check whether the [Userfaultfd] is dead or not.
is_dead(&self, uffd: &Userfaultfd) -> bool428*bb4ee6a4SAndroid Build Coastguard Worker     fn is_dead(&self, uffd: &Userfaultfd) -> bool;
429*bb4ee6a4SAndroid Build Coastguard Worker     /// Free the internal state.
reset(&self) -> anyhow::Result<()>430*bb4ee6a4SAndroid Build Coastguard Worker     fn reset(&self) -> anyhow::Result<()>;
431*bb4ee6a4SAndroid Build Coastguard Worker }
432*bb4ee6a4SAndroid Build Coastguard Worker 
433*bb4ee6a4SAndroid Build Coastguard Worker /// Check whether the process for the [Userfaultfd] is dead or not.
434*bb4ee6a4SAndroid Build Coastguard Worker ///
435*bb4ee6a4SAndroid Build Coastguard Worker /// [DeadUffdCheckerImpl] uses `UFFD_ZERO` on a dummy mmap page to check the liveness.
436*bb4ee6a4SAndroid Build Coastguard Worker ///
437*bb4ee6a4SAndroid Build Coastguard Worker /// This must keep alive on the main process to make the dummy mmap present in all descendant
438*bb4ee6a4SAndroid Build Coastguard Worker /// processes.
439*bb4ee6a4SAndroid Build Coastguard Worker pub struct DeadUffdCheckerImpl {
440*bb4ee6a4SAndroid Build Coastguard Worker     dummy_mmap: MemoryMapping,
441*bb4ee6a4SAndroid Build Coastguard Worker }
442*bb4ee6a4SAndroid Build Coastguard Worker 
443*bb4ee6a4SAndroid Build Coastguard Worker impl DeadUffdCheckerImpl {
444*bb4ee6a4SAndroid Build Coastguard Worker     /// Creates [DeadUffdCheckerImpl].
new() -> anyhow::Result<Self>445*bb4ee6a4SAndroid Build Coastguard Worker     pub fn new() -> anyhow::Result<Self> {
446*bb4ee6a4SAndroid Build Coastguard Worker         Ok(Self {
447*bb4ee6a4SAndroid Build Coastguard Worker             dummy_mmap: MemoryMappingBuilder::new(pages_to_bytes(1))
448*bb4ee6a4SAndroid Build Coastguard Worker                 .build()
449*bb4ee6a4SAndroid Build Coastguard Worker                 .context("create dummy mmap")?,
450*bb4ee6a4SAndroid Build Coastguard Worker         })
451*bb4ee6a4SAndroid Build Coastguard Worker     }
452*bb4ee6a4SAndroid Build Coastguard Worker }
453*bb4ee6a4SAndroid Build Coastguard Worker 
454*bb4ee6a4SAndroid Build Coastguard Worker impl DeadUffdChecker for DeadUffdCheckerImpl {
register(&self, uffd: &Userfaultfd) -> anyhow::Result<()>455*bb4ee6a4SAndroid Build Coastguard Worker     fn register(&self, uffd: &Userfaultfd) -> anyhow::Result<()> {
456*bb4ee6a4SAndroid Build Coastguard Worker         // SAFETY: no one except DeadUffdCheckerImpl access dummy_mmap.
457*bb4ee6a4SAndroid Build Coastguard Worker         unsafe { uffd.register(self.dummy_mmap.as_ptr() as usize, pages_to_bytes(1)) }
458*bb4ee6a4SAndroid Build Coastguard Worker             .map(|_| ())
459*bb4ee6a4SAndroid Build Coastguard Worker             .context("register to dummy mmap")
460*bb4ee6a4SAndroid Build Coastguard Worker     }
461*bb4ee6a4SAndroid Build Coastguard Worker 
is_dead(&self, uffd: &Userfaultfd) -> bool462*bb4ee6a4SAndroid Build Coastguard Worker     fn is_dead(&self, uffd: &Userfaultfd) -> bool {
463*bb4ee6a4SAndroid Build Coastguard Worker         // UFFDIO_ZEROPAGE returns ESRCH for dead uffd.
464*bb4ee6a4SAndroid Build Coastguard Worker         matches!(
465*bb4ee6a4SAndroid Build Coastguard Worker             uffd.zero(self.dummy_mmap.as_ptr() as usize, pages_to_bytes(1), false),
466*bb4ee6a4SAndroid Build Coastguard Worker             Err(Error::UffdClosed)
467*bb4ee6a4SAndroid Build Coastguard Worker         )
468*bb4ee6a4SAndroid Build Coastguard Worker     }
469*bb4ee6a4SAndroid Build Coastguard Worker 
reset(&self) -> anyhow::Result<()>470*bb4ee6a4SAndroid Build Coastguard Worker     fn reset(&self) -> anyhow::Result<()> {
471*bb4ee6a4SAndroid Build Coastguard Worker         self.dummy_mmap
472*bb4ee6a4SAndroid Build Coastguard Worker             .remove_range(0, pages_to_bytes(1))
473*bb4ee6a4SAndroid Build Coastguard Worker             .context("free dummy mmap")
474*bb4ee6a4SAndroid Build Coastguard Worker     }
475*bb4ee6a4SAndroid Build Coastguard Worker }
476