1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #![deny(missing_docs)]
6
7 use std::fs::read_to_string;
8 use std::num::ParseIntError;
9 use std::path::Path;
10 use std::str::FromStr;
11 use std::thread::sleep;
12 use std::time::Duration;
13
14 use anyhow::anyhow;
15 use anyhow::bail;
16 use anyhow::Context;
17 use anyhow::Result;
18 use base::linux::getpid;
19 use base::linux::kill;
20 use base::linux::Signal;
21 use base::Pid;
22
23 /// Stops all the crosvm device processes during moving the guest memory to the staging memory.
24 ///
25 /// While moving, we must guarantee that no one changes the guest memory contents. This supports
26 /// devices in sandbox mode only.
27 ///
28 /// We stop all the crosvm processes instead of the alternatives.
29 ///
30 /// * Just stop vCPUs
31 /// * devices still may works in the child process and write something to the guest memory.
32 /// * Use write protection of userfaultfd
33 /// * UFFDIO_REGISTER_MODE_WP for shmem is WIP and not supported yet.
34 /// * `devices::Suspendable::sleep()`
35 /// * `Suspendable` is not supported by all devices yet.
36 pub struct ProcessesGuard {
37 pids: Vec<Pid>,
38 }
39
40 /// Stops all crosvm child processes except this monitor process using signals.
41 ///
42 /// The stopped processes are resumed when the freezer object is freed.
43 ///
44 /// This must be called from the main process.
freeze_child_processes(monitor_pid: Pid) -> Result<ProcessesGuard>45 pub fn freeze_child_processes(monitor_pid: Pid) -> Result<ProcessesGuard> {
46 let mut guard = ProcessesGuard {
47 pids: load_descendants(getpid(), monitor_pid)?,
48 };
49
50 for _ in 0..3 {
51 guard.stop_the_world().context("stop the world")?;
52 let pids_after = load_descendants(getpid(), monitor_pid)?;
53 if pids_after == guard.pids {
54 return Ok(guard);
55 }
56 guard.pids = pids_after;
57 }
58
59 bail!("new processes forked while freezing");
60 }
61
62 impl ProcessesGuard {
63 /// Stops all the crosvm processes by sending SIGSTOP signal.
stop_the_world(&self) -> Result<()>64 fn stop_the_world(&self) -> Result<()> {
65 for pid in &self.pids {
66 // SAFETY:
67 // safe because pid in pids are crosvm processes except this monitor process.
68 unsafe { kill(*pid, Signal::Stop as i32) }.context("failed to stop process")?;
69 }
70 for pid in &self.pids {
71 wait_process_stopped(*pid).context("wait process stopped")?;
72 }
73 Ok(())
74 }
75
76 /// Resumes all the crosvm processes by sending SIGCONT signal.
continue_the_world(&self)77 fn continue_the_world(&self) {
78 for pid in &self.pids {
79 // SAFETY:
80 // safe because pid in pids are crosvm processes except this monitor process and
81 // continue signal does not have side effects.
82 // ignore the result because we don't care whether it succeeds.
83 let _ = unsafe { kill(*pid, Signal::Continue as i32) };
84 }
85 }
86 }
87
88 impl Drop for ProcessesGuard {
drop(&mut self)89 fn drop(&mut self) {
90 self.continue_the_world();
91 }
92 }
93
94 /// Loads Pids of crosvm descendant processes except the monitor procesess.
load_descendants(current_pid: Pid, monitor_pid: Pid) -> Result<Vec<Pid>>95 fn load_descendants(current_pid: Pid, monitor_pid: Pid) -> Result<Vec<Pid>> {
96 // children of the current process.
97 let children = read_to_string(format!("/proc/{0}/task/{0}/children", current_pid))
98 .context("read children")?;
99 let children = children.trim();
100 // str::split() to empty string results a iterator just returning 1 empty string.
101 if children.is_empty() {
102 return Ok(Vec::new());
103 }
104 let pids: std::result::Result<Vec<i32>, ParseIntError> = children
105 .split(" ")
106 .map(i32::from_str)
107 // except this monitor process
108 .filter(|pid| match pid {
109 Ok(pid) => *pid != monitor_pid,
110 _ => true,
111 })
112 .collect();
113 let pids = pids.context("parse pids")?;
114 let mut result = Vec::new();
115 for pid in pids {
116 result.push(pid);
117 let pids = load_descendants(pid, monitor_pid)?;
118 result.extend(pids);
119 }
120 Ok(result)
121 }
122
123 /// Extract process state from /proc/pid/stat.
124 ///
125 /// `/proc/<pid>/stat` file contains metadata for the process including the process state.
126 ///
127 /// See [proc(5)](https://man7.org/linux/man-pages/man5/proc.5.html) for the format.
parse_process_state(text: &str) -> Option<char>128 fn parse_process_state(text: &str) -> Option<char> {
129 let chars = text.chars();
130 let mut chars = chars.peekable();
131 // skip to the end of "comm"
132 while match chars.next() {
133 Some(c) => c != ')',
134 None => false,
135 } {}
136 // skip the whitespace between "comm" and "state"
137 while match chars.peek() {
138 Some(c) => {
139 let is_whitespace = *c == ' ';
140 if is_whitespace {
141 chars.next();
142 }
143 is_whitespace
144 }
145 None => false,
146 } {}
147 // the state
148 chars.next()
149 }
150
wait_for_task_stopped(task_path: &Path) -> Result<()>151 fn wait_for_task_stopped(task_path: &Path) -> Result<()> {
152 for _ in 0..10 {
153 let stat = read_to_string(task_path.join("stat")).context("read process status")?;
154 if let Some(state) = parse_process_state(&stat) {
155 if state == 'T' {
156 return Ok(());
157 }
158 }
159 sleep(Duration::from_millis(50));
160 }
161 Err(anyhow!("time out"))
162 }
163
wait_process_stopped(pid: Pid) -> Result<()>164 fn wait_process_stopped(pid: Pid) -> Result<()> {
165 let all_tasks = std::fs::read_dir(format!("/proc/{}/task", pid)).context("read tasks")?;
166 for task in all_tasks {
167 wait_for_task_stopped(&task.context("read task entry")?.path()).context("wait for task")?;
168 }
169 Ok(())
170 }
171
172 #[cfg(test)]
173 mod tests {
174 use super::*;
175
176 #[test]
parse_process_state_tests()177 fn parse_process_state_tests() {
178 assert_eq!(parse_process_state("1234 (crosvm) T 0 0 0").unwrap(), 'T');
179 assert_eq!(parse_process_state("1234 (crosvm) R 0 0 0").unwrap(), 'R');
180 // more than 1 white space
181 assert_eq!(parse_process_state("1234 (crosvm) T 0 0 0").unwrap(), 'T');
182 // no white space between comm and state
183 assert_eq!(parse_process_state("1234 (crosvm)T 0 0 0").unwrap(), 'T');
184 // white space in the comm
185 assert_eq!(
186 parse_process_state("1234 (crosvm --test) T 0 0 0").unwrap(),
187 'T'
188 );
189 // no status
190 assert_eq!(parse_process_state("1234 (crosvm)").is_none(), true);
191 }
192 }
193