xref: /aosp_15_r20/external/crosvm/base/src/sys/linux/poll.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::cmp::min;
6 use std::fs::File;
7 use std::marker::PhantomData;
8 use std::mem::MaybeUninit;
9 use std::ptr::null_mut;
10 use std::time::Duration;
11 
12 use libc::c_int;
13 use libc::epoll_create1;
14 use libc::epoll_ctl;
15 use libc::epoll_event;
16 use libc::epoll_wait;
17 use libc::ENOENT;
18 use libc::EPOLLHUP;
19 use libc::EPOLLIN;
20 use libc::EPOLLOUT;
21 use libc::EPOLLRDHUP;
22 use libc::EPOLL_CLOEXEC;
23 use libc::EPOLL_CTL_ADD;
24 use libc::EPOLL_CTL_DEL;
25 use libc::EPOLL_CTL_MOD;
26 use smallvec::SmallVec;
27 
28 use super::errno_result;
29 use super::Result;
30 use crate::handle_eintr_errno;
31 use crate::AsRawDescriptor;
32 use crate::EventToken;
33 use crate::EventType;
34 use crate::FromRawDescriptor;
35 use crate::RawDescriptor;
36 use crate::TriggeredEvent;
37 
38 const EVENT_CONTEXT_MAX_EVENTS: usize = 16;
39 
40 impl From<EventType> for u32 {
from(et: EventType) -> u3241     fn from(et: EventType) -> u32 {
42         let v = match et {
43             EventType::None => 0,
44             EventType::Read => EPOLLIN,
45             EventType::Write => EPOLLOUT,
46             EventType::ReadWrite => EPOLLIN | EPOLLOUT,
47         };
48         v as u32
49     }
50 }
51 
52 /// Used to poll multiple objects that have file descriptors.
53 ///
54 /// See [`crate::WaitContext`] for an example that uses the cross-platform wrapper.
55 pub struct EventContext<T> {
56     epoll_ctx: File,
57     // Needed to satisfy usage of T
58     tokens: PhantomData<[T]>,
59 }
60 
61 impl<T: EventToken> EventContext<T> {
62     /// Creates a new `EventContext`.
new() -> Result<EventContext<T>>63     pub fn new() -> Result<EventContext<T>> {
64         // SAFETY:
65         // Safe because we check the return value.
66         let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) };
67         if epoll_fd < 0 {
68             return errno_result();
69         }
70         Ok(EventContext {
71             // SAFETY:
72             // Safe because epoll_fd is valid.
73             epoll_ctx: unsafe { File::from_raw_descriptor(epoll_fd) },
74             tokens: PhantomData,
75         })
76     }
77 
78     /// Creates a new `EventContext` and adds the slice of `fd` and `token` tuples to the new
79     /// context.
80     ///
81     /// This is equivalent to calling `new` followed by `add_many`. If there is an error, this will
82     /// return the error instead of the new context.
build_with(fd_tokens: &[(&dyn AsRawDescriptor, T)]) -> Result<EventContext<T>>83     pub fn build_with(fd_tokens: &[(&dyn AsRawDescriptor, T)]) -> Result<EventContext<T>> {
84         let ctx = EventContext::new()?;
85         ctx.add_many(fd_tokens)?;
86         Ok(ctx)
87     }
88 
89     /// Adds the given slice of `fd` and `token` tuples to this context.
90     ///
91     /// This is equivalent to calling `add` with each `fd` and `token`. If there are any errors,
92     /// this method will stop adding `fd`s and return the first error, leaving this context in a
93     /// undefined state.
add_many(&self, fd_tokens: &[(&dyn AsRawDescriptor, T)]) -> Result<()>94     pub fn add_many(&self, fd_tokens: &[(&dyn AsRawDescriptor, T)]) -> Result<()> {
95         for (fd, token) in fd_tokens {
96             self.add(*fd, T::from_raw_token(token.as_raw_token()))?;
97         }
98         Ok(())
99     }
100 
101     /// Adds the given `fd` to this context and associates the given `token` with the `fd`'s
102     /// readable events.
103     ///
104     /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
105     /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
106     /// FD number) added to this context, events will not be reported by `wait` anymore.
add(&self, fd: &dyn AsRawDescriptor, token: T) -> Result<()>107     pub fn add(&self, fd: &dyn AsRawDescriptor, token: T) -> Result<()> {
108         self.add_for_event(fd, EventType::Read, token)
109     }
110 
111     /// Adds the given `descriptor` to this context, watching for the specified events and
112     /// associates the given 'token' with those events.
113     ///
114     /// A `descriptor` can only be added once and does not need to be kept open. If the `descriptor`
115     /// is dropped and there were no duplicated file descriptors (i.e. adding the same descriptor
116     /// with a different FD number) added to this context, events will not be reported by `wait`
117     /// anymore.
add_for_event( &self, descriptor: &dyn AsRawDescriptor, event_type: EventType, token: T, ) -> Result<()>118     pub fn add_for_event(
119         &self,
120         descriptor: &dyn AsRawDescriptor,
121         event_type: EventType,
122         token: T,
123     ) -> Result<()> {
124         let mut evt = epoll_event {
125             events: event_type.into(),
126             u64: token.as_raw_token(),
127         };
128         // SAFETY:
129         // Safe because we give a valid epoll FD and FD to watch, as well as a valid epoll_event
130         // structure. Then we check the return value.
131         let ret = unsafe {
132             epoll_ctl(
133                 self.epoll_ctx.as_raw_descriptor(),
134                 EPOLL_CTL_ADD,
135                 descriptor.as_raw_descriptor(),
136                 &mut evt,
137             )
138         };
139         if ret < 0 {
140             return errno_result();
141         };
142         Ok(())
143     }
144 
145     /// If `fd` was previously added to this context, the watched events will be replaced with
146     /// `event_type` and the token associated with it will be replaced with the given `token`.
modify(&self, fd: &dyn AsRawDescriptor, event_type: EventType, token: T) -> Result<()>147     pub fn modify(&self, fd: &dyn AsRawDescriptor, event_type: EventType, token: T) -> Result<()> {
148         let mut evt = epoll_event {
149             events: event_type.into(),
150             u64: token.as_raw_token(),
151         };
152         // SAFETY:
153         // Safe because we give a valid epoll FD and FD to modify, as well as a valid epoll_event
154         // structure. Then we check the return value.
155         let ret = unsafe {
156             epoll_ctl(
157                 self.epoll_ctx.as_raw_descriptor(),
158                 EPOLL_CTL_MOD,
159                 fd.as_raw_descriptor(),
160                 &mut evt,
161             )
162         };
163         if ret < 0 {
164             return errno_result();
165         };
166         Ok(())
167     }
168 
169     /// Deletes the given `fd` from this context. If the `fd` is not being polled by this context,
170     /// the call is silently dropped without errors.
171     ///
172     /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
173     /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
174     /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
175     /// CPU load.
delete(&self, fd: &dyn AsRawDescriptor) -> Result<()>176     pub fn delete(&self, fd: &dyn AsRawDescriptor) -> Result<()> {
177         // SAFETY:
178         // Safe because we give a valid epoll FD and FD to stop watching. Then we check the return
179         // value.
180         let ret = unsafe {
181             epoll_ctl(
182                 self.epoll_ctx.as_raw_descriptor(),
183                 EPOLL_CTL_DEL,
184                 fd.as_raw_descriptor(),
185                 null_mut(),
186             )
187         };
188         // If epoll_ctl returns ENOENT it means the fd is not part of the current polling set so
189         // there is nothing to delete.
190         if ret < 0 && ret != ENOENT {
191             return errno_result();
192         };
193         Ok(())
194     }
195 
196     /// Waits for any events to occur in FDs that were previously added to this context.
197     ///
198     /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
199     /// for readable events and not closing for hungup events), subsequent calls to `wait` will
200     /// return immediately. The consequence of not handling an event perpetually while calling
201     /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
202     /// ~100% usage.
wait(&self) -> Result<SmallVec<[TriggeredEvent<T>; 16]>>203     pub fn wait(&self) -> Result<SmallVec<[TriggeredEvent<T>; 16]>> {
204         self.wait_timeout(Duration::new(i64::MAX as u64, 0))
205     }
206 
207     /// Like `wait` except will only block for a maximum of the given `timeout`.
208     ///
209     /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
210     /// system limits.
wait_timeout(&self, timeout: Duration) -> Result<SmallVec<[TriggeredEvent<T>; 16]>>211     pub fn wait_timeout(&self, timeout: Duration) -> Result<SmallVec<[TriggeredEvent<T>; 16]>> {
212         let mut epoll_events: [MaybeUninit<epoll_event>; EVENT_CONTEXT_MAX_EVENTS] =
213             // SAFETY:
214             // `MaybeUnint<T>` has the same layout as plain `T` (`epoll_event` in our case).
215             // We submit an uninitialized array to the `epoll_wait` system call, which returns how many
216             // elements it initialized, and then we convert only the initialized `MaybeUnint` values
217             // into `epoll_event` structures after the call.
218             unsafe { MaybeUninit::uninit().assume_init() };
219 
220         let timeout_millis = if timeout.as_secs() as i64 == i64::MAX {
221             // We make the convenient assumption that 2^63 seconds is an effectively unbounded time
222             // frame. This is meant to mesh with `wait` calling us with no timeout.
223             -1
224         } else {
225             // In cases where we the number of milliseconds would overflow an i32, we substitute the
226             // maximum timeout which is ~24.8 days.
227             let millis = timeout
228                 .as_secs()
229                 .checked_mul(1_000)
230                 .and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000))
231                 .unwrap_or(i32::MAX as u64);
232             min(i32::MAX as u64, millis) as i32
233         };
234         let ret = {
235             let max_events = epoll_events.len() as c_int;
236             // SAFETY:
237             // Safe because we give an epoll context and a properly sized epoll_events array
238             // pointer, which we trust the kernel to fill in properly. The `transmute` is safe,
239             // since `MaybeUnint<T>` has the same layout as `T`, and the `epoll_wait` syscall will
240             // initialize as many elements of the `epoll_events` array as it returns.
241             unsafe {
242                 handle_eintr_errno!(epoll_wait(
243                     self.epoll_ctx.as_raw_descriptor(),
244                     std::mem::transmute(&mut epoll_events[0]),
245                     max_events,
246                     timeout_millis
247                 ))
248             }
249         };
250         if ret < 0 {
251             return errno_result();
252         }
253         let count = ret as usize;
254 
255         let events = epoll_events[0..count]
256             .iter()
257             .map(|e| {
258                 // SAFETY:
259                 // Converting `MaybeUninit<epoll_event>` into `epoll_event` is safe here, since we
260                 // are only iterating over elements that the `epoll_wait` system call initialized.
261                 let e = unsafe { e.assume_init() };
262                 TriggeredEvent {
263                     token: T::from_raw_token(e.u64),
264                     is_readable: e.events & (EPOLLIN as u32) != 0,
265                     is_writable: e.events & (EPOLLOUT as u32) != 0,
266                     is_hungup: e.events & ((EPOLLHUP | EPOLLRDHUP) as u32) != 0,
267                 }
268             })
269             .collect();
270         Ok(events)
271     }
272 }
273 
274 impl<T: EventToken> AsRawDescriptor for EventContext<T> {
as_raw_descriptor(&self) -> RawDescriptor275     fn as_raw_descriptor(&self) -> RawDescriptor {
276         self.epoll_ctx.as_raw_descriptor()
277     }
278 }
279 
280 #[cfg(test)]
281 mod tests {
282     use std::time::Instant;
283 
284     use base_event_token_derive::EventToken;
285 
286     use super::*;
287     use crate::Event;
288 
289     #[test]
event_context()290     fn event_context() {
291         let evt1 = Event::new().unwrap();
292         let evt2 = Event::new().unwrap();
293         evt1.signal().unwrap();
294         evt2.signal().unwrap();
295         let ctx: EventContext<u32> = EventContext::build_with(&[(&evt1, 1), (&evt2, 2)]).unwrap();
296 
297         let mut evt_count = 0;
298         while evt_count < 2 {
299             for event in ctx.wait().unwrap().iter().filter(|e| e.is_readable) {
300                 evt_count += 1;
301                 match event.token {
302                     1 => {
303                         evt1.wait().unwrap();
304                         ctx.delete(&evt1).unwrap();
305                     }
306                     2 => {
307                         evt2.wait().unwrap();
308                         ctx.delete(&evt2).unwrap();
309                     }
310                     _ => panic!("unexpected token"),
311                 };
312             }
313         }
314         assert_eq!(evt_count, 2);
315     }
316 
317     #[test]
event_context_overflow()318     fn event_context_overflow() {
319         const EVT_COUNT: usize = EVENT_CONTEXT_MAX_EVENTS * 2 + 1;
320         let ctx: EventContext<usize> = EventContext::new().unwrap();
321         let mut evts = Vec::with_capacity(EVT_COUNT);
322         for i in 0..EVT_COUNT {
323             let evt = Event::new().unwrap();
324             evt.signal().unwrap();
325             ctx.add(&evt, i).unwrap();
326             evts.push(evt);
327         }
328         let mut evt_count = 0;
329         while evt_count < EVT_COUNT {
330             for event in ctx.wait().unwrap().iter().filter(|e| e.is_readable) {
331                 evts[event.token].wait().unwrap();
332                 evt_count += 1;
333             }
334         }
335     }
336 
337     #[test]
event_context_timeout()338     fn event_context_timeout() {
339         let ctx: EventContext<u32> = EventContext::new().unwrap();
340         let dur = Duration::from_millis(10);
341         let start_inst = Instant::now();
342         ctx.wait_timeout(dur).unwrap();
343         assert!(start_inst.elapsed() >= dur);
344     }
345 
346     #[test]
347     #[allow(dead_code)]
event_token_derive()348     fn event_token_derive() {
349         #[derive(EventToken)]
350         enum EmptyToken {}
351 
352         #[derive(PartialEq, Debug, EventToken)]
353         enum Token {
354             Alpha,
355             Beta,
356             // comments
357             Gamma(u32),
358             Delta { index: usize },
359             Omega,
360         }
361 
362         assert_eq!(
363             Token::from_raw_token(Token::Alpha.as_raw_token()),
364             Token::Alpha
365         );
366         assert_eq!(
367             Token::from_raw_token(Token::Beta.as_raw_token()),
368             Token::Beta
369         );
370         assert_eq!(
371             Token::from_raw_token(Token::Gamma(55).as_raw_token()),
372             Token::Gamma(55)
373         );
374         assert_eq!(
375             Token::from_raw_token(Token::Delta { index: 100 }.as_raw_token()),
376             Token::Delta { index: 100 }
377         );
378         assert_eq!(
379             Token::from_raw_token(Token::Omega.as_raw_token()),
380             Token::Omega
381         );
382     }
383 }
384