1 //! A library for awaiting and killing child processes from multiple threads. 2 //! 3 //! - [Docs](https://docs.rs/shared_child) 4 //! - [Crate](https://crates.io/crates/shared_child) 5 //! - [Repo](https://github.com/oconnor663/shared_child.rs) 6 //! 7 //! The 8 //! [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html) 9 //! type in the standard library provides 10 //! [`wait`](https://doc.rust-lang.org/std/process/struct.Child.html#method.wait) 11 //! and 12 //! [`kill`](https://doc.rust-lang.org/std/process/struct.Child.html#method.kill) 13 //! methods that take `&mut self`, making it impossible to kill a child process 14 //! while another thread is waiting on it. That design works around a race 15 //! condition in Unix's `waitpid` function, where a PID might get reused as soon 16 //! as the wait returns, so a signal sent around the same time could 17 //! accidentally get delivered to the wrong process. 18 //! 19 //! However with the newer POSIX `waitid` function, we can wait on a child 20 //! without freeing its PID for reuse. That makes it safe to send signals 21 //! concurrently. Windows has actually always supported this, by preventing PID 22 //! reuse while there are still open handles to a child process. This library 23 //! wraps `std::process::Child` for concurrent use, backed by these APIs. 24 //! 25 //! Compatibility note: The `libc` crate doesn't currently support `waitid` on 26 //! NetBSD or OpenBSD, or on older versions of OSX. There [might also 27 //! be](https://bugs.python.org/msg167016) some version of OSX where the 28 //! `waitid` function exists but is broken. We can add a "best effort" 29 //! workaround using `waitpid` for these platforms as we run into them. Please 30 //! [file an issue](https://github.com/oconnor663/shared_child.rs/issues/new) if 31 //! you hit this. 32 //! 33 //! # Example 34 //! 35 //! ```rust 36 //! use shared_child::SharedChild; 37 //! use std::process::Command; 38 //! use std::sync::Arc; 39 //! 40 //! // Spawn a child that will just sleep for a long time, 41 //! // and put it in an Arc to share between threads. 42 //! let mut command = Command::new("python"); 43 //! command.arg("-c").arg("import time; time.sleep(1000000000)"); 44 //! let shared_child = SharedChild::spawn(&mut command).unwrap(); 45 //! let child_arc = Arc::new(shared_child); 46 //! 47 //! // On another thread, wait on the child process. 48 //! let child_arc_clone = child_arc.clone(); 49 //! let thread = std::thread::spawn(move || { 50 //! child_arc_clone.wait().unwrap() 51 //! }); 52 //! 53 //! // While the other thread is waiting, kill the child process. 54 //! // This wouldn't be possible with e.g. Arc<Mutex<Child>> from 55 //! // the standard library, because the waiting thread would be 56 //! // holding the mutex. 57 //! child_arc.kill().unwrap(); 58 //! 59 //! // Join the waiting thread and get the exit status. 60 //! let exit_status = thread.join().unwrap(); 61 //! assert!(!exit_status.success()); 62 //! ``` 63 64 use std::io; 65 use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command, ExitStatus}; 66 use std::sync::{Condvar, Mutex}; 67 68 mod sys; 69 70 // Publish the Unix-only SharedChildExt trait. 71 #[cfg(unix)] 72 pub mod unix; 73 74 #[derive(Debug)] 75 pub struct SharedChild { 76 // This lock provides shared access to kill() and wait(). We never hold it 77 // during a blocking wait, though, so that non-blocking waits and kills can 78 // go through. (Blocking waits use libc::waitid with the WNOWAIT flag.) 79 child: Mutex<Child>, 80 81 // When there are multiple waiting threads, one of them will actually wait 82 // on the child, and the rest will block on this condvar. 83 state_lock: Mutex<ChildState>, 84 state_condvar: Condvar, 85 } 86 87 impl SharedChild { 88 /// Spawn a new `SharedChild` from a 89 /// [`std::process::Command`](https://doc.rust-lang.org/std/process/struct.Command.html). spawn(command: &mut Command) -> io::Result<Self>90 pub fn spawn(command: &mut Command) -> io::Result<Self> { 91 let child = command.spawn()?; 92 Ok(Self { 93 child: Mutex::new(child), 94 state_lock: Mutex::new(NotWaiting), 95 state_condvar: Condvar::new(), 96 }) 97 } 98 99 /// Construct a new `SharedChild` from an already spawned 100 /// [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html). 101 /// 102 /// This constructor needs to know whether `child` has already been waited on, and the only way 103 /// to find that out is to call `child.try_wait()` internally. If the child process is 104 /// currently a zombie, that call will clean it up as a side effect. The [`SharedChild::spawn`] 105 /// constructor doesn't need to do this. new(mut child: Child) -> io::Result<Self>106 pub fn new(mut child: Child) -> io::Result<Self> { 107 let state = match child.try_wait()? { 108 Some(status) => Exited(status), 109 None => NotWaiting, 110 }; 111 Ok(Self { 112 child: Mutex::new(child), 113 state_lock: Mutex::new(state), 114 state_condvar: Condvar::new(), 115 }) 116 } 117 118 /// Return the child process ID. id(&self) -> u32119 pub fn id(&self) -> u32 { 120 self.child.lock().unwrap().id() 121 } 122 get_handle(&self) -> sys::Handle123 fn get_handle(&self) -> sys::Handle { 124 sys::get_handle(&self.child.lock().unwrap()) 125 } 126 127 /// Wait for the child to exit, blocking the current thread, and return its 128 /// exit status. wait(&self) -> io::Result<ExitStatus>129 pub fn wait(&self) -> io::Result<ExitStatus> { 130 let mut state = self.state_lock.lock().unwrap(); 131 loop { 132 match *state { 133 NotWaiting => { 134 // Either no one is waiting on the child yet, or a previous 135 // waiter failed. That means we need to do it ourselves. 136 // Break out of this loop. 137 break; 138 } 139 Waiting => { 140 // Another thread is already waiting on the child. We'll 141 // block until it signal us on the condvar, then loop again. 142 // Spurious wakeups could bring us here multiple times 143 // though, see the Condvar docs. 144 state = self.state_condvar.wait(state).unwrap(); 145 } 146 Exited(exit_status) => return Ok(exit_status), 147 } 148 } 149 150 // If we get here, we have the state lock, and we're the thread 151 // responsible for waiting on the child. Set the state to Waiting and 152 // then release the state lock, so that other threads can observe it 153 // while we block. Afterwards we must leave the Waiting state before 154 // this function exits, or other waiters will deadlock. 155 *state = Waiting; 156 drop(state); 157 158 // Block until the child exits without reaping it. (On Unix, that means 159 // we need to call libc::waitid with the WNOWAIT flag. On Windows 160 // waiting never reaps.) That makes it safe for another thread to kill 161 // while we're here, without racing against some process reusing the 162 // child's PID. Having only one thread in this section is important, 163 // because POSIX doesn't guarantee much about what happens when multiple 164 // threads wait on a child at the same time: 165 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_13 166 let noreap_result = sys::wait_without_reaping(self.get_handle()); 167 168 // Now either we hit an error, or the child has exited and needs to be 169 // reaped. Retake the state lock and handle all the different exit 170 // cases. No matter what happened/happens, we'll leave the Waiting state 171 // and signal the state condvar. 172 let mut state = self.state_lock.lock().unwrap(); 173 // The child has already exited, so this wait should clean up without blocking. 174 let final_result = noreap_result.and_then(|_| self.child.lock().unwrap().wait()); 175 *state = if let Ok(exit_status) = final_result { 176 Exited(exit_status) 177 } else { 178 NotWaiting 179 }; 180 self.state_condvar.notify_all(); 181 final_result 182 } 183 184 /// Return the child's exit status if it has already exited. If the child is 185 /// still running, return `Ok(None)`. try_wait(&self) -> io::Result<Option<ExitStatus>>186 pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> { 187 let mut status = self.state_lock.lock().unwrap(); 188 189 // Unlike wait() above, we don't loop on the Condvar here. If the status 190 // is Waiting or Exited, we return immediately. However, if the status 191 // is NotWaiting, we'll do a non-blocking wait below, in case the child 192 // has already exited. 193 match *status { 194 NotWaiting => {} 195 Waiting => return Ok(None), 196 Exited(exit_status) => return Ok(Some(exit_status)), 197 }; 198 199 // No one is waiting on the child. Check to see if it's already exited. 200 // If it has, put ourselves in the Exited state. (There can't be any 201 // other waiters to signal, because the state was NotWaiting when we 202 // started, and we're still holding the status lock.) 203 if sys::try_wait_without_reaping(self.get_handle())? { 204 // The child has exited. Reap it. This should not block. 205 let exit_status = self.child.lock().unwrap().wait()?; 206 *status = Exited(exit_status); 207 Ok(Some(exit_status)) 208 } else { 209 Ok(None) 210 } 211 } 212 213 /// Send a kill signal to the child. On Unix this sends SIGKILL, and you 214 /// should call `wait` afterwards to avoid leaving a zombie. If the process 215 /// has already been waited on, this returns `Ok(())` and does nothing. kill(&self) -> io::Result<()>216 pub fn kill(&self) -> io::Result<()> { 217 let status = self.state_lock.lock().unwrap(); 218 if let Exited(_) = *status { 219 return Ok(()); 220 } 221 // The child is still running. Kill it. This assumes that the wait 222 // functions above will never hold the child lock during a blocking 223 // wait. 224 self.child.lock().unwrap().kill() 225 } 226 227 /// Consume the `SharedChild` and return the 228 /// [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html) 229 /// it contains. 230 /// 231 /// We never reap the child process except by calling `wait` or `try_wait` 232 /// on it, so the child object's inner state is correct, even if it was 233 /// waited on while it was shared. into_inner(self) -> Child234 pub fn into_inner(self) -> Child { 235 self.child.into_inner().unwrap() 236 } 237 238 /// Take the child's 239 /// [`stdin`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stdin) 240 /// handle, if any. 241 /// 242 /// This will only return `Some` the first time it's called, and then only if the `Command` 243 /// that created the child was configured with `.stdin(Stdio::piped())`. take_stdin(&self) -> Option<ChildStdin>244 pub fn take_stdin(&self) -> Option<ChildStdin> { 245 self.child.lock().unwrap().stdin.take() 246 } 247 248 /// Take the child's 249 /// [`stdout`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stdout) 250 /// handle, if any. 251 /// 252 /// This will only return `Some` the first time it's called, and then only if the `Command` 253 /// that created the child was configured with `.stdout(Stdio::piped())`. take_stdout(&self) -> Option<ChildStdout>254 pub fn take_stdout(&self) -> Option<ChildStdout> { 255 self.child.lock().unwrap().stdout.take() 256 } 257 258 /// Take the child's 259 /// [`stderr`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stderr) 260 /// handle, if any. 261 /// 262 /// This will only return `Some` the first time it's called, and then only if the `Command` 263 /// that created the child was configured with `.stderr(Stdio::piped())`. take_stderr(&self) -> Option<ChildStderr>264 pub fn take_stderr(&self) -> Option<ChildStderr> { 265 self.child.lock().unwrap().stderr.take() 266 } 267 } 268 269 #[derive(Debug)] 270 enum ChildState { 271 NotWaiting, 272 Waiting, 273 Exited(ExitStatus), 274 } 275 276 use crate::ChildState::*; 277 278 #[cfg(test)] 279 mod tests { 280 use super::*; 281 use std::error::Error; 282 use std::process::{Command, Stdio}; 283 use std::sync::Arc; 284 285 // Python isn't available on some Unix platforms, e.g. Android, so we need this instead. 286 #[cfg(unix)] true_cmd() -> Command287 pub fn true_cmd() -> Command { 288 Command::new("true") 289 } 290 291 #[cfg(not(unix))] true_cmd() -> Command292 pub fn true_cmd() -> Command { 293 let mut cmd = Command::new("python"); 294 cmd.arg("-c").arg(""); 295 cmd 296 } 297 298 // Python isn't available on some Unix platforms, e.g. Android, so we need this instead. 299 #[cfg(unix)] sleep_forever_cmd() -> Command300 pub fn sleep_forever_cmd() -> Command { 301 let mut cmd = Command::new("sleep"); 302 cmd.arg("1000000"); 303 cmd 304 } 305 306 #[cfg(not(unix))] sleep_forever_cmd() -> Command307 pub fn sleep_forever_cmd() -> Command { 308 let mut cmd = Command::new("python"); 309 cmd.arg("-c").arg("import time; time.sleep(1000000)"); 310 cmd 311 } 312 313 // Python isn't available on some Unix platforms, e.g. Android, so we need this instead. 314 #[cfg(unix)] cat_cmd() -> Command315 pub fn cat_cmd() -> Command { 316 Command::new("cat") 317 } 318 319 #[cfg(not(unix))] cat_cmd() -> Command320 pub fn cat_cmd() -> Command { 321 let mut cmd = Command::new("python"); 322 cmd.arg("-c").arg(""); 323 cmd 324 } 325 326 #[test] test_wait()327 fn test_wait() { 328 let child = SharedChild::spawn(&mut true_cmd()).unwrap(); 329 // Test the id() function while we're at it. 330 let id = child.id(); 331 assert!(id > 0); 332 let status = child.wait().unwrap(); 333 assert_eq!(status.code().unwrap(), 0); 334 } 335 336 #[test] test_kill()337 fn test_kill() { 338 let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); 339 child.kill().unwrap(); 340 let status = child.wait().unwrap(); 341 assert!(!status.success()); 342 } 343 344 #[test] test_try_wait()345 fn test_try_wait() { 346 let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); 347 let maybe_status = child.try_wait().unwrap(); 348 assert_eq!(maybe_status, None); 349 child.kill().unwrap(); 350 // The child will handle that signal asynchronously, so we check it 351 // repeatedly in a busy loop. 352 let mut maybe_status = None; 353 while let None = maybe_status { 354 maybe_status = child.try_wait().unwrap(); 355 } 356 assert!(maybe_status.is_some()); 357 assert!(!maybe_status.unwrap().success()); 358 } 359 360 #[test] test_many_waiters()361 fn test_many_waiters() { 362 let child = Arc::new(SharedChild::spawn(&mut sleep_forever_cmd()).unwrap()); 363 let mut threads = Vec::new(); 364 for _ in 0..10 { 365 let clone = child.clone(); 366 threads.push(std::thread::spawn(move || clone.wait())); 367 } 368 child.kill().unwrap(); 369 for thread in threads { 370 thread.join().unwrap().unwrap(); 371 } 372 } 373 374 #[test] test_waitid_after_exit_doesnt_hang()375 fn test_waitid_after_exit_doesnt_hang() { 376 // There are ominous reports (https://bugs.python.org/issue10812) of a 377 // broken waitid implementation on OSX, which might hang forever if it 378 // tries to wait on a child that's already exited. 379 let child = true_cmd().spawn().unwrap(); 380 sys::wait_without_reaping(sys::get_handle(&child)).unwrap(); 381 // At this point the child has definitely exited. Wait again to test 382 // that a second wait doesn't block. 383 sys::wait_without_reaping(sys::get_handle(&child)).unwrap(); 384 } 385 386 #[test] test_into_inner_before_wait()387 fn test_into_inner_before_wait() { 388 let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); 389 let mut child = shared_child.into_inner(); 390 child.kill().unwrap(); 391 child.wait().unwrap(); 392 } 393 394 #[test] test_into_inner_after_wait()395 fn test_into_inner_after_wait() { 396 // This makes sure the child's inner state is valid. If we used waitpid 397 // on the side, the inner child would try to wait again and cause an 398 // error. 399 let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap(); 400 shared_child.kill().unwrap(); 401 shared_child.wait().unwrap(); 402 let mut child = shared_child.into_inner(); 403 // The child has already been waited on, so kill should be an error. 404 let kill_err = child.kill().unwrap_err(); 405 if cfg!(windows) { 406 assert_eq!(std::io::ErrorKind::PermissionDenied, kill_err.kind()); 407 } else { 408 assert_eq!(std::io::ErrorKind::InvalidInput, kill_err.kind()); 409 } 410 // But wait should succeed. 411 child.wait().unwrap(); 412 } 413 414 #[test] test_new() -> Result<(), Box<dyn Error>>415 fn test_new() -> Result<(), Box<dyn Error>> { 416 // Spawn a short-lived child. 417 let mut command = cat_cmd(); 418 command.stdin(Stdio::piped()); 419 command.stdout(Stdio::null()); 420 let mut child = command.spawn()?; 421 let child_stdin = child.stdin.take().unwrap(); 422 423 // Construct a SharedChild from the Child, which has not yet been waited on. The child is 424 // blocked on stdin, so we know it hasn't yet exited. 425 let mut shared_child = SharedChild::new(child).unwrap(); 426 assert!(matches!( 427 *shared_child.state_lock.lock().unwrap(), 428 NotWaiting, 429 )); 430 431 // Now close the child's stdin. This will cause the child to exit. 432 drop(child_stdin); 433 434 // Construct more SharedChild objects from the same child, in a loop. Eventually one of 435 // them will notice that the child has exited. 436 loop { 437 shared_child = SharedChild::new(shared_child.into_inner())?; 438 if let Exited(status) = &*shared_child.state_lock.lock().unwrap() { 439 assert!(status.success()); 440 return Ok(()); 441 } 442 } 443 } 444 445 #[test] test_takes() -> Result<(), Box<dyn Error>>446 fn test_takes() -> Result<(), Box<dyn Error>> { 447 let mut command = true_cmd(); 448 command.stdin(Stdio::piped()); 449 command.stdout(Stdio::piped()); 450 command.stderr(Stdio::piped()); 451 let shared_child = SharedChild::spawn(&mut command)?; 452 453 assert!(shared_child.take_stdin().is_some()); 454 assert!(shared_child.take_stdout().is_some()); 455 assert!(shared_child.take_stderr().is_some()); 456 457 assert!(shared_child.take_stdin().is_none()); 458 assert!(shared_child.take_stdout().is_none()); 459 assert!(shared_child.take_stderr().is_none()); 460 461 shared_child.wait()?; 462 Ok(()) 463 } 464 } 465