1 // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 // SPDX-License-Identifier: BSD-3-Clause
3 
4 //! Safe wrappers over the
5 //! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API.
6 
7 use std::io;
8 use std::ops::{Deref, Drop};
9 use std::os::unix::io::{AsRawFd, RawFd};
10 
11 #[cfg(any(target_os = "linux", target_os = "android"))]
12 use bitflags::bitflags;
13 use libc::{
14     epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP,
15     EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC,
16     EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
17 };
18 
19 use crate::syscall::SyscallReturnCode;
20 
21 /// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor.
22 #[derive(Debug)]
23 #[repr(i32)]
24 pub enum ControlOperation {
25     /// Add a file descriptor to the interest list.
26     Add = EPOLL_CTL_ADD,
27     /// Change the settings associated with a file descriptor that is
28     /// already in the interest list.
29     Modify = EPOLL_CTL_MOD,
30     /// Remove a file descriptor from the interest list.
31     Delete = EPOLL_CTL_DEL,
32 }
33 
34 bitflags! {
35     /// The type of events we can monitor a file descriptor for.
36     pub struct EventSet: u32 {
37         /// The associated file descriptor is available for read operations.
38         const IN = EPOLLIN as u32;
39         /// The associated file descriptor is available for write operations.
40         const OUT = EPOLLOUT as u32;
41         /// Error condition happened on the associated file descriptor.
42         const ERROR = EPOLLERR as u32;
43         /// This can be used to detect peer shutdown when using Edge Triggered monitoring.
44         const READ_HANG_UP = EPOLLRDHUP as u32;
45         /// Sets the Edge Triggered behavior for the associated file descriptor.
46         /// The default behavior is Level Triggered.
47         const EDGE_TRIGGERED = EPOLLET as u32;
48         /// Hang up happened on the associated file descriptor. Note that `epoll_wait`
49         /// will always wait for this event and it is not necessary to set it in events.
50         const HANG_UP = EPOLLHUP as u32;
51         /// There is an exceptional condition on that file descriptor. It is mostly used to
52         /// set high priority for some data.
53         const PRIORITY = EPOLLPRI as u32;
54         /// The event is considered as being "processed" from the time when it is returned
55         /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same
56         /// epoll file descriptor, the closure of that file descriptor, the removal of the
57         /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP
58         /// for the event file descriptor via EPOLL_CTL_MOD.
59         const WAKE_UP = EPOLLWAKEUP as u32;
60         /// Sets the one-shot behavior for the associated file descriptor.
61         const ONE_SHOT = EPOLLONESHOT as u32;
62         /// Sets an exclusive wake up mode for the epoll file descriptor that is being
63         /// attached to the associated file descriptor.
64         /// When a wake up event occurs and multiple epoll file descriptors are attached to
65         /// the same target file using this mode, one or more of the epoll file descriptors
66         /// will receive an event with `epoll_wait`. The default here is for all those file
67         /// descriptors to receive an event.
68         const EXCLUSIVE = EPOLLEXCLUSIVE as u32;
69     }
70 }
71 
72 /// Wrapper over
73 /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
74 // We are using `transparent` here to be super sure that this struct and its fields
75 // have the same alignment as those from the `epoll_event` struct from C.
76 #[repr(transparent)]
77 #[derive(Clone, Copy)]
78 pub struct EpollEvent(epoll_event);
79 
80 impl std::fmt::Debug for EpollEvent {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result81     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82         write!(f, "{{ events: {}, data: {} }}", self.events(), self.data())
83     }
84 }
85 
86 impl Deref for EpollEvent {
87     type Target = epoll_event;
deref(&self) -> &Self::Target88     fn deref(&self) -> &Self::Target {
89         &self.0
90     }
91 }
92 
93 impl Default for EpollEvent {
default() -> Self94     fn default() -> Self {
95         EpollEvent(epoll_event {
96             events: 0u32,
97             u64: 0u64,
98         })
99     }
100 }
101 
102 impl EpollEvent {
103     /// Create a new epoll_event instance.
104     ///
105     /// # Arguments
106     ///
107     /// `events` - contains an event mask.
108     /// `data` - a user data variable. `data` field can be a fd on which
109     ///          we want to monitor the events specified by `events`.
110     ///
111     /// # Examples
112     ///
113     /// ```
114     /// extern crate vmm_sys_util;
115     /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
116     ///
117     /// let event = EpollEvent::new(EventSet::IN, 2);
118     /// ```
new(events: EventSet, data: u64) -> Self119     pub fn new(events: EventSet, data: u64) -> Self {
120         EpollEvent(epoll_event {
121             events: events.bits(),
122             u64: data,
123         })
124     }
125 
126     /// Returns the `events` from
127     /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
128     ///
129     /// # Examples
130     ///
131     /// ```
132     /// extern crate vmm_sys_util;
133     /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
134     ///
135     /// let event = EpollEvent::new(EventSet::IN, 2);
136     /// assert_eq!(event.events(), 1);
137     /// ```
events(&self) -> u32138     pub fn events(&self) -> u32 {
139         self.events
140     }
141 
142     /// Returns the `EventSet` corresponding to `epoll_event.events`.
143     ///
144     /// # Panics
145     ///
146     /// Panics if `libc::epoll_event` contains invalid events.
147     ///
148     ///
149     /// # Examples
150     ///
151     /// ```
152     /// extern crate vmm_sys_util;
153     /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
154     ///
155     /// let event = EpollEvent::new(EventSet::IN, 2);
156     /// assert_eq!(event.event_set(), EventSet::IN);
157     /// ```
event_set(&self) -> EventSet158     pub fn event_set(&self) -> EventSet {
159         // This unwrap is safe because `epoll_events` can only be user created or
160         // initialized by the kernel. We trust the kernel to only send us valid
161         // events. The user can only initialize `epoll_events` using valid events.
162         EventSet::from_bits(self.events()).unwrap()
163     }
164 
165     /// Returns the `data` from the `libc::epoll_event`.
166     ///
167     /// # Examples
168     ///
169     /// ```
170     /// extern crate vmm_sys_util;
171     /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
172     ///
173     /// let event = EpollEvent::new(EventSet::IN, 2);
174     /// assert_eq!(event.data(), 2);
175     /// ```
data(&self) -> u64176     pub fn data(&self) -> u64 {
177         self.u64
178     }
179 
180     /// Converts the `libc::epoll_event` data to a RawFd.
181     ///
182     /// This conversion is lossy when the data does not correspond to a RawFd
183     /// (data does not fit in a i32).
184     ///
185     /// # Examples
186     ///
187     /// ```
188     /// extern crate vmm_sys_util;
189     /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
190     ///
191     /// let event = EpollEvent::new(EventSet::IN, 2);
192     /// assert_eq!(event.fd(), 2);
193     /// ```
fd(&self) -> RawFd194     pub fn fd(&self) -> RawFd {
195         self.u64 as i32
196     }
197 }
198 
199 /// Wrapper over epoll functionality.
200 #[derive(Debug)]
201 pub struct Epoll {
202     epoll_fd: RawFd,
203 }
204 
205 impl Epoll {
206     /// Create a new epoll file descriptor.
new() -> io::Result<Self>207     pub fn new() -> io::Result<Self> {
208         let epoll_fd = SyscallReturnCode(
209             // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`.
210             unsafe { epoll_create1(EPOLL_CLOEXEC) },
211         )
212         .into_result()?;
213         Ok(Epoll { epoll_fd })
214     }
215 
216     /// Wrapper for `libc::epoll_ctl`.
217     ///
218     /// This can be used for adding, modifying or removing a file descriptor in the
219     /// interest list of the epoll instance.
220     ///
221     /// # Arguments
222     ///
223     /// * `operation` - refers to the action to be performed on the file descriptor.
224     /// * `fd` - the file descriptor on which we want to perform `operation`.
225     /// * `event` - refers to the `epoll_event` instance that is linked to `fd`.
226     ///
227     /// # Examples
228     ///
229     /// ```
230     /// extern crate vmm_sys_util;
231     ///
232     /// use std::os::unix::io::AsRawFd;
233     /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
234     /// use vmm_sys_util::eventfd::EventFd;
235     ///
236     /// let epoll = Epoll::new().unwrap();
237     /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
238     /// epoll
239     ///     .ctl(
240     ///         ControlOperation::Add,
241     ///         event_fd.as_raw_fd() as i32,
242     ///         EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64),
243     ///     )
244     ///     .unwrap();
245     /// epoll
246     ///     .ctl(
247     ///         ControlOperation::Modify,
248     ///         event_fd.as_raw_fd() as i32,
249     ///         EpollEvent::new(EventSet::IN, 4),
250     ///     )
251     ///     .unwrap();
252     /// ```
ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()>253     pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> {
254         SyscallReturnCode(
255             // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor
256             // to watch, as well as a valid epoll_event structure. We also check the return value.
257             unsafe {
258                 epoll_ctl(
259                     self.epoll_fd,
260                     operation as i32,
261                     fd,
262                     &event as *const EpollEvent as *mut epoll_event,
263                 )
264             },
265         )
266         .into_empty_result()
267     }
268 
269     /// Wrapper for `libc::epoll_wait`.
270     /// Returns the number of file descriptors in the interest list that became ready
271     /// for I/O or `errno` if an error occurred.
272     ///
273     /// # Arguments
274     ///
275     /// * `timeout` - specifies for how long the `epoll_wait` system call will block
276     ///               (measured in milliseconds).
277     /// * `events` - points to a memory area that will be used for storing the events
278     ///              returned by `epoll_wait()` call.
279     ///
280     /// # Examples
281     ///
282     /// ```
283     /// extern crate vmm_sys_util;
284     ///
285     /// use std::os::unix::io::AsRawFd;
286     /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
287     /// use vmm_sys_util::eventfd::EventFd;
288     ///
289     /// let epoll = Epoll::new().unwrap();
290     /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
291     ///
292     /// let mut ready_events = vec![EpollEvent::default(); 10];
293     /// epoll
294     ///     .ctl(
295     ///         ControlOperation::Add,
296     ///         event_fd.as_raw_fd() as i32,
297     ///         EpollEvent::new(EventSet::OUT, 4),
298     ///     )
299     ///     .unwrap();
300     /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap();
301     /// assert_eq!(ev_count, 1);
302     /// ```
wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize>303     pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> {
304         let events_count = SyscallReturnCode(
305             // SAFETY: Safe because we give a valid epoll file descriptor and an array of
306             // epoll_event structures that will be modified by the kernel to indicate information
307             // about the subset of file descriptors in the interest list.
308             // We also check the return value.
309             unsafe {
310                 epoll_wait(
311                     self.epoll_fd,
312                     events.as_mut_ptr() as *mut epoll_event,
313                     events.len() as i32,
314                     timeout,
315                 )
316             },
317         )
318         .into_result()? as usize;
319 
320         Ok(events_count)
321     }
322 }
323 
324 impl AsRawFd for Epoll {
as_raw_fd(&self) -> RawFd325     fn as_raw_fd(&self) -> RawFd {
326         self.epoll_fd
327     }
328 }
329 
330 impl Drop for Epoll {
drop(&mut self)331     fn drop(&mut self) {
332         // SAFETY: Safe because this fd is opened with `epoll_create` and we trust
333         // the kernel to give us a valid fd.
334         unsafe {
335             libc::close(self.epoll_fd);
336         }
337     }
338 }
339 
340 #[cfg(test)]
341 mod tests {
342     use super::*;
343 
344     use crate::eventfd::EventFd;
345 
346     #[test]
test_event_ops()347     fn test_event_ops() {
348         let mut event = EpollEvent::default();
349         assert_eq!(event.events(), 0);
350         assert_eq!(event.data(), 0);
351 
352         event = EpollEvent::new(EventSet::IN, 2);
353         assert_eq!(event.events(), 1);
354         assert_eq!(event.event_set(), EventSet::IN);
355 
356         assert_eq!(event.data(), 2);
357         assert_eq!(event.fd(), 2);
358     }
359 
360     #[test]
test_events_debug()361     fn test_events_debug() {
362         let events = EpollEvent::new(EventSet::IN, 42);
363         assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }")
364     }
365 
366     #[test]
test_epoll()367     fn test_epoll() {
368         const DEFAULT__TIMEOUT: i32 = 250;
369         const EVENT_BUFFER_SIZE: usize = 128;
370 
371         let epoll = Epoll::new().unwrap();
372         assert_eq!(epoll.epoll_fd, epoll.as_raw_fd());
373 
374         // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality.
375 
376         let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
377         // For EPOLLOUT to be available it is enough only to be possible to write a value of
378         // at least 1 to the eventfd counter without blocking.
379         // If we write a value greater than 0 to this counter, the fd will be available for
380         // EPOLLIN events too.
381         event_fd_1.write(1).unwrap();
382 
383         let mut event_1 =
384             EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64);
385 
386         // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into
387         // the interest list of epoll instance.
388         assert!(epoll
389             .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1)
390             .is_ok());
391 
392         // We can't add twice the same fd to epoll interest list.
393         assert!(epoll
394             .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1)
395             .is_err());
396 
397         let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
398         event_fd_2.write(1).unwrap();
399         assert!(epoll
400             .ctl(
401                 ControlOperation::Add,
402                 event_fd_2.as_raw_fd(),
403                 // For this fd, we want an Event instance that has `data` field set to other
404                 // value than the value of the fd and `events` without EPOLLIN type set.
405                 EpollEvent::new(EventSet::OUT, 10)
406             )
407             .is_ok());
408 
409         // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN
410         // event to not be available for this fd, even if we say that we want to monitor this type
411         // of event via EPOLL_CTL_ADD operation.
412         let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
413         let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64);
414         assert!(epoll
415             .ctl(ControlOperation::Add, event_fd_3.as_raw_fd(), event_3)
416             .is_ok());
417 
418         // Let's check `epoll_wait()` behavior for our epoll instance.
419         let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE];
420         let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
421 
422         // We expect to have 3 fds in the ready list of epoll instance.
423         assert_eq!(ev_count, 3);
424 
425         // Let's check also the Event values that are now returned in the ready list.
426         assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64);
427         // For this fd, `data` field was populated with random data instead of the
428         // corresponding fd value.
429         assert_eq!(ready_events[1].data(), 10);
430         assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64);
431 
432         // EPOLLIN and EPOLLOUT should be available for this fd.
433         assert_eq!(
434             ready_events[0].events(),
435             (EventSet::IN | EventSet::OUT).bits()
436         );
437         // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd.
438         assert_eq!(ready_events[1].events(), EventSet::OUT.bits());
439         // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value
440         // greater than 0 to it).
441         assert_eq!(ready_events[2].events(), EventSet::OUT.bits());
442 
443         // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD
444         // behavior.
445         // We create here a new Event with some events, other than those previously set,
446         // that we want to monitor this time on event_fd_1.
447         event_1 = EpollEvent::new(EventSet::OUT, 20);
448         assert!(epoll
449             .ctl(ControlOperation::Modify, event_fd_1.as_raw_fd(), event_1)
450             .is_ok());
451 
452         let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
453         // Can't modify a fd that wasn't added to epoll interest list.
454         assert!(epoll
455             .ctl(
456                 ControlOperation::Modify,
457                 event_fd_4.as_raw_fd(),
458                 EpollEvent::default()
459             )
460             .is_err());
461 
462         let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
463 
464         // Let's check that Event fields were indeed changed for the `event_fd_1` fd.
465         assert_eq!(ready_events[0].data(), 20);
466         // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation.
467         assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
468 
469         // Now let's set for a fd to not have any events monitored.
470         assert!(epoll
471             .ctl(
472                 ControlOperation::Modify,
473                 event_fd_1.as_raw_fd(),
474                 EpollEvent::default()
475             )
476             .is_ok());
477 
478         // In this particular case we expect to remain only with 2 fds in the ready list.
479         ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
480         assert_eq!(ev_count, 2);
481 
482         // Let's also delete a fd from the interest list.
483         assert!(epoll
484             .ctl(
485                 ControlOperation::Delete,
486                 event_fd_2.as_raw_fd(),
487                 EpollEvent::default()
488             )
489             .is_ok());
490 
491         // We expect to have only one fd remained in the ready list (event_fd_3).
492         ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
493 
494         assert_eq!(ev_count, 1);
495         assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64);
496         assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
497 
498         // If we try to remove a fd from epoll interest list that wasn't added before it will fail.
499         assert!(epoll
500             .ctl(
501                 ControlOperation::Delete,
502                 event_fd_4.as_raw_fd(),
503                 EpollEvent::default()
504             )
505             .is_err());
506     }
507 }
508