1 //! A library for awaiting and killing child processes from multiple threads.
2 //!
3 //! - [Docs](https://docs.rs/shared_child)
4 //! - [Crate](https://crates.io/crates/shared_child)
5 //! - [Repo](https://github.com/oconnor663/shared_child.rs)
6 //!
7 //! The
8 //! [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html)
9 //! type in the standard library provides
10 //! [`wait`](https://doc.rust-lang.org/std/process/struct.Child.html#method.wait)
11 //! and
12 //! [`kill`](https://doc.rust-lang.org/std/process/struct.Child.html#method.kill)
13 //! methods that take `&mut self`, making it impossible to kill a child process
14 //! while another thread is waiting on it. That design works around a race
15 //! condition in Unix's `waitpid` function, where a PID might get reused as soon
16 //! as the wait returns, so a signal sent around the same time could
17 //! accidentally get delivered to the wrong process.
18 //!
19 //! However with the newer POSIX `waitid` function, we can wait on a child
20 //! without freeing its PID for reuse. That makes it safe to send signals
21 //! concurrently. Windows has actually always supported this, by preventing PID
22 //! reuse while there are still open handles to a child process. This library
23 //! wraps `std::process::Child` for concurrent use, backed by these APIs.
24 //!
25 //! Compatibility note: The `libc` crate doesn't currently support `waitid` on
26 //! NetBSD or OpenBSD, or on older versions of OSX. There [might also
27 //! be](https://bugs.python.org/msg167016) some version of OSX where the
28 //! `waitid` function exists but is broken. We can add a "best effort"
29 //! workaround using `waitpid` for these platforms as we run into them. Please
30 //! [file an issue](https://github.com/oconnor663/shared_child.rs/issues/new) if
31 //! you hit this.
32 //!
33 //! # Example
34 //!
35 //! ```rust
36 //! use shared_child::SharedChild;
37 //! use std::process::Command;
38 //! use std::sync::Arc;
39 //!
40 //! // Spawn a child that will just sleep for a long time,
41 //! // and put it in an Arc to share between threads.
42 //! let mut command = Command::new("python");
43 //! command.arg("-c").arg("import time; time.sleep(1000000000)");
44 //! let shared_child = SharedChild::spawn(&mut command).unwrap();
45 //! let child_arc = Arc::new(shared_child);
46 //!
47 //! // On another thread, wait on the child process.
48 //! let child_arc_clone = child_arc.clone();
49 //! let thread = std::thread::spawn(move || {
50 //!     child_arc_clone.wait().unwrap()
51 //! });
52 //!
53 //! // While the other thread is waiting, kill the child process.
54 //! // This wouldn't be possible with e.g. Arc<Mutex<Child>> from
55 //! // the standard library, because the waiting thread would be
56 //! // holding the mutex.
57 //! child_arc.kill().unwrap();
58 //!
59 //! // Join the waiting thread and get the exit status.
60 //! let exit_status = thread.join().unwrap();
61 //! assert!(!exit_status.success());
62 //! ```
63 
64 use std::io;
65 use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command, ExitStatus};
66 use std::sync::{Condvar, Mutex};
67 
68 mod sys;
69 
70 // Publish the Unix-only SharedChildExt trait.
71 #[cfg(unix)]
72 pub mod unix;
73 
74 #[derive(Debug)]
75 pub struct SharedChild {
76     // This lock provides shared access to kill() and wait(). We never hold it
77     // during a blocking wait, though, so that non-blocking waits and kills can
78     // go through. (Blocking waits use libc::waitid with the WNOWAIT flag.)
79     child: Mutex<Child>,
80 
81     // When there are multiple waiting threads, one of them will actually wait
82     // on the child, and the rest will block on this condvar.
83     state_lock: Mutex<ChildState>,
84     state_condvar: Condvar,
85 }
86 
87 impl SharedChild {
88     /// Spawn a new `SharedChild` from a
89     /// [`std::process::Command`](https://doc.rust-lang.org/std/process/struct.Command.html).
spawn(command: &mut Command) -> io::Result<Self>90     pub fn spawn(command: &mut Command) -> io::Result<Self> {
91         let child = command.spawn()?;
92         Ok(Self {
93             child: Mutex::new(child),
94             state_lock: Mutex::new(NotWaiting),
95             state_condvar: Condvar::new(),
96         })
97     }
98 
99     /// Construct a new `SharedChild` from an already spawned
100     /// [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html).
101     ///
102     /// This constructor needs to know whether `child` has already been waited on, and the only way
103     /// to find that out is to call `child.try_wait()` internally. If the child process is
104     /// currently a zombie, that call will clean it up as a side effect. The [`SharedChild::spawn`]
105     /// constructor doesn't need to do this.
new(mut child: Child) -> io::Result<Self>106     pub fn new(mut child: Child) -> io::Result<Self> {
107         let state = match child.try_wait()? {
108             Some(status) => Exited(status),
109             None => NotWaiting,
110         };
111         Ok(Self {
112             child: Mutex::new(child),
113             state_lock: Mutex::new(state),
114             state_condvar: Condvar::new(),
115         })
116     }
117 
118     /// Return the child process ID.
id(&self) -> u32119     pub fn id(&self) -> u32 {
120         self.child.lock().unwrap().id()
121     }
122 
get_handle(&self) -> sys::Handle123     fn get_handle(&self) -> sys::Handle {
124         sys::get_handle(&self.child.lock().unwrap())
125     }
126 
127     /// Wait for the child to exit, blocking the current thread, and return its
128     /// exit status.
wait(&self) -> io::Result<ExitStatus>129     pub fn wait(&self) -> io::Result<ExitStatus> {
130         let mut state = self.state_lock.lock().unwrap();
131         loop {
132             match *state {
133                 NotWaiting => {
134                     // Either no one is waiting on the child yet, or a previous
135                     // waiter failed. That means we need to do it ourselves.
136                     // Break out of this loop.
137                     break;
138                 }
139                 Waiting => {
140                     // Another thread is already waiting on the child. We'll
141                     // block until it signal us on the condvar, then loop again.
142                     // Spurious wakeups could bring us here multiple times
143                     // though, see the Condvar docs.
144                     state = self.state_condvar.wait(state).unwrap();
145                 }
146                 Exited(exit_status) => return Ok(exit_status),
147             }
148         }
149 
150         // If we get here, we have the state lock, and we're the thread
151         // responsible for waiting on the child. Set the state to Waiting and
152         // then release the state lock, so that other threads can observe it
153         // while we block. Afterwards we must leave the Waiting state before
154         // this function exits, or other waiters will deadlock.
155         *state = Waiting;
156         drop(state);
157 
158         // Block until the child exits without reaping it. (On Unix, that means
159         // we need to call libc::waitid with the WNOWAIT flag. On Windows
160         // waiting never reaps.) That makes it safe for another thread to kill
161         // while we're here, without racing against some process reusing the
162         // child's PID. Having only one thread in this section is important,
163         // because POSIX doesn't guarantee much about what happens when multiple
164         // threads wait on a child at the same time:
165         // http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_13
166         let noreap_result = sys::wait_without_reaping(self.get_handle());
167 
168         // Now either we hit an error, or the child has exited and needs to be
169         // reaped. Retake the state lock and handle all the different exit
170         // cases. No matter what happened/happens, we'll leave the Waiting state
171         // and signal the state condvar.
172         let mut state = self.state_lock.lock().unwrap();
173         // The child has already exited, so this wait should clean up without blocking.
174         let final_result = noreap_result.and_then(|_| self.child.lock().unwrap().wait());
175         *state = if let Ok(exit_status) = final_result {
176             Exited(exit_status)
177         } else {
178             NotWaiting
179         };
180         self.state_condvar.notify_all();
181         final_result
182     }
183 
184     /// Return the child's exit status if it has already exited. If the child is
185     /// still running, return `Ok(None)`.
try_wait(&self) -> io::Result<Option<ExitStatus>>186     pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> {
187         let mut status = self.state_lock.lock().unwrap();
188 
189         // Unlike wait() above, we don't loop on the Condvar here. If the status
190         // is Waiting or Exited, we return immediately. However, if the status
191         // is NotWaiting, we'll do a non-blocking wait below, in case the child
192         // has already exited.
193         match *status {
194             NotWaiting => {}
195             Waiting => return Ok(None),
196             Exited(exit_status) => return Ok(Some(exit_status)),
197         };
198 
199         // No one is waiting on the child. Check to see if it's already exited.
200         // If it has, put ourselves in the Exited state. (There can't be any
201         // other waiters to signal, because the state was NotWaiting when we
202         // started, and we're still holding the status lock.)
203         if sys::try_wait_without_reaping(self.get_handle())? {
204             // The child has exited. Reap it. This should not block.
205             let exit_status = self.child.lock().unwrap().wait()?;
206             *status = Exited(exit_status);
207             Ok(Some(exit_status))
208         } else {
209             Ok(None)
210         }
211     }
212 
213     /// Send a kill signal to the child. On Unix this sends SIGKILL, and you
214     /// should call `wait` afterwards to avoid leaving a zombie. If the process
215     /// has already been waited on, this returns `Ok(())` and does nothing.
kill(&self) -> io::Result<()>216     pub fn kill(&self) -> io::Result<()> {
217         let status = self.state_lock.lock().unwrap();
218         if let Exited(_) = *status {
219             return Ok(());
220         }
221         // The child is still running. Kill it. This assumes that the wait
222         // functions above will never hold the child lock during a blocking
223         // wait.
224         self.child.lock().unwrap().kill()
225     }
226 
227     /// Consume the `SharedChild` and return the
228     /// [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html)
229     /// it contains.
230     ///
231     /// We never reap the child process except by calling `wait` or `try_wait`
232     /// on it, so the child object's inner state is correct, even if it was
233     /// waited on while it was shared.
into_inner(self) -> Child234     pub fn into_inner(self) -> Child {
235         self.child.into_inner().unwrap()
236     }
237 
238     /// Take the child's
239     /// [`stdin`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stdin)
240     /// handle, if any.
241     ///
242     /// This will only return `Some` the first time it's called, and then only if the `Command`
243     /// that created the child was configured with `.stdin(Stdio::piped())`.
take_stdin(&self) -> Option<ChildStdin>244     pub fn take_stdin(&self) -> Option<ChildStdin> {
245         self.child.lock().unwrap().stdin.take()
246     }
247 
248     /// Take the child's
249     /// [`stdout`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stdout)
250     /// handle, if any.
251     ///
252     /// This will only return `Some` the first time it's called, and then only if the `Command`
253     /// that created the child was configured with `.stdout(Stdio::piped())`.
take_stdout(&self) -> Option<ChildStdout>254     pub fn take_stdout(&self) -> Option<ChildStdout> {
255         self.child.lock().unwrap().stdout.take()
256     }
257 
258     /// Take the child's
259     /// [`stderr`](https://doc.rust-lang.org/std/process/struct.Child.html#structfield.stderr)
260     /// handle, if any.
261     ///
262     /// This will only return `Some` the first time it's called, and then only if the `Command`
263     /// that created the child was configured with `.stderr(Stdio::piped())`.
take_stderr(&self) -> Option<ChildStderr>264     pub fn take_stderr(&self) -> Option<ChildStderr> {
265         self.child.lock().unwrap().stderr.take()
266     }
267 }
268 
269 #[derive(Debug)]
270 enum ChildState {
271     NotWaiting,
272     Waiting,
273     Exited(ExitStatus),
274 }
275 
276 use crate::ChildState::*;
277 
278 #[cfg(test)]
279 mod tests {
280     use super::*;
281     use std::error::Error;
282     use std::process::{Command, Stdio};
283     use std::sync::Arc;
284 
285     // Python isn't available on some Unix platforms, e.g. Android, so we need this instead.
286     #[cfg(unix)]
true_cmd() -> Command287     pub fn true_cmd() -> Command {
288         Command::new("true")
289     }
290 
291     #[cfg(not(unix))]
true_cmd() -> Command292     pub fn true_cmd() -> Command {
293         let mut cmd = Command::new("python");
294         cmd.arg("-c").arg("");
295         cmd
296     }
297 
298     // Python isn't available on some Unix platforms, e.g. Android, so we need this instead.
299     #[cfg(unix)]
sleep_forever_cmd() -> Command300     pub fn sleep_forever_cmd() -> Command {
301         let mut cmd = Command::new("sleep");
302         cmd.arg("1000000");
303         cmd
304     }
305 
306     #[cfg(not(unix))]
sleep_forever_cmd() -> Command307     pub fn sleep_forever_cmd() -> Command {
308         let mut cmd = Command::new("python");
309         cmd.arg("-c").arg("import time; time.sleep(1000000)");
310         cmd
311     }
312 
313     // Python isn't available on some Unix platforms, e.g. Android, so we need this instead.
314     #[cfg(unix)]
cat_cmd() -> Command315     pub fn cat_cmd() -> Command {
316         Command::new("cat")
317     }
318 
319     #[cfg(not(unix))]
cat_cmd() -> Command320     pub fn cat_cmd() -> Command {
321         let mut cmd = Command::new("python");
322         cmd.arg("-c").arg("");
323         cmd
324     }
325 
326     #[test]
test_wait()327     fn test_wait() {
328         let child = SharedChild::spawn(&mut true_cmd()).unwrap();
329         // Test the id() function while we're at it.
330         let id = child.id();
331         assert!(id > 0);
332         let status = child.wait().unwrap();
333         assert_eq!(status.code().unwrap(), 0);
334     }
335 
336     #[test]
test_kill()337     fn test_kill() {
338         let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
339         child.kill().unwrap();
340         let status = child.wait().unwrap();
341         assert!(!status.success());
342     }
343 
344     #[test]
test_try_wait()345     fn test_try_wait() {
346         let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
347         let maybe_status = child.try_wait().unwrap();
348         assert_eq!(maybe_status, None);
349         child.kill().unwrap();
350         // The child will handle that signal asynchronously, so we check it
351         // repeatedly in a busy loop.
352         let mut maybe_status = None;
353         while let None = maybe_status {
354             maybe_status = child.try_wait().unwrap();
355         }
356         assert!(maybe_status.is_some());
357         assert!(!maybe_status.unwrap().success());
358     }
359 
360     #[test]
test_many_waiters()361     fn test_many_waiters() {
362         let child = Arc::new(SharedChild::spawn(&mut sleep_forever_cmd()).unwrap());
363         let mut threads = Vec::new();
364         for _ in 0..10 {
365             let clone = child.clone();
366             threads.push(std::thread::spawn(move || clone.wait()));
367         }
368         child.kill().unwrap();
369         for thread in threads {
370             thread.join().unwrap().unwrap();
371         }
372     }
373 
374     #[test]
test_waitid_after_exit_doesnt_hang()375     fn test_waitid_after_exit_doesnt_hang() {
376         // There are ominous reports (https://bugs.python.org/issue10812) of a
377         // broken waitid implementation on OSX, which might hang forever if it
378         // tries to wait on a child that's already exited.
379         let child = true_cmd().spawn().unwrap();
380         sys::wait_without_reaping(sys::get_handle(&child)).unwrap();
381         // At this point the child has definitely exited. Wait again to test
382         // that a second wait doesn't block.
383         sys::wait_without_reaping(sys::get_handle(&child)).unwrap();
384     }
385 
386     #[test]
test_into_inner_before_wait()387     fn test_into_inner_before_wait() {
388         let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
389         let mut child = shared_child.into_inner();
390         child.kill().unwrap();
391         child.wait().unwrap();
392     }
393 
394     #[test]
test_into_inner_after_wait()395     fn test_into_inner_after_wait() {
396         // This makes sure the child's inner state is valid. If we used waitpid
397         // on the side, the inner child would try to wait again and cause an
398         // error.
399         let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
400         shared_child.kill().unwrap();
401         shared_child.wait().unwrap();
402         let mut child = shared_child.into_inner();
403         // The child has already been waited on, so kill should be an error.
404         let kill_err = child.kill().unwrap_err();
405         if cfg!(windows) {
406             assert_eq!(std::io::ErrorKind::PermissionDenied, kill_err.kind());
407         } else {
408             assert_eq!(std::io::ErrorKind::InvalidInput, kill_err.kind());
409         }
410         // But wait should succeed.
411         child.wait().unwrap();
412     }
413 
414     #[test]
test_new() -> Result<(), Box<dyn Error>>415     fn test_new() -> Result<(), Box<dyn Error>> {
416         // Spawn a short-lived child.
417         let mut command = cat_cmd();
418         command.stdin(Stdio::piped());
419         command.stdout(Stdio::null());
420         let mut child = command.spawn()?;
421         let child_stdin = child.stdin.take().unwrap();
422 
423         // Construct a SharedChild from the Child, which has not yet been waited on. The child is
424         // blocked on stdin, so we know it hasn't yet exited.
425         let mut shared_child = SharedChild::new(child).unwrap();
426         assert!(matches!(
427             *shared_child.state_lock.lock().unwrap(),
428             NotWaiting,
429         ));
430 
431         // Now close the child's stdin. This will cause the child to exit.
432         drop(child_stdin);
433 
434         // Construct more SharedChild objects from the same child, in a loop. Eventually one of
435         // them will notice that the child has exited.
436         loop {
437             shared_child = SharedChild::new(shared_child.into_inner())?;
438             if let Exited(status) = &*shared_child.state_lock.lock().unwrap() {
439                 assert!(status.success());
440                 return Ok(());
441             }
442         }
443     }
444 
445     #[test]
test_takes() -> Result<(), Box<dyn Error>>446     fn test_takes() -> Result<(), Box<dyn Error>> {
447         let mut command = true_cmd();
448         command.stdin(Stdio::piped());
449         command.stdout(Stdio::piped());
450         command.stderr(Stdio::piped());
451         let shared_child = SharedChild::spawn(&mut command)?;
452 
453         assert!(shared_child.take_stdin().is_some());
454         assert!(shared_child.take_stdout().is_some());
455         assert!(shared_child.take_stderr().is_some());
456 
457         assert!(shared_child.take_stdin().is_none());
458         assert!(shared_child.take_stdout().is_none());
459         assert!(shared_child.take_stderr().is_none());
460 
461         shared_child.wait()?;
462         Ok(())
463     }
464 }
465