1 // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // SPDX-License-Identifier: BSD-3-Clause 3 4 //! Safe wrappers over the 5 //! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API. 6 7 use std::io; 8 use std::ops::{Deref, Drop}; 9 use std::os::unix::io::{AsRawFd, RawFd}; 10 11 #[cfg(any(target_os = "linux", target_os = "android"))] 12 use bitflags::bitflags; 13 use libc::{ 14 epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP, 15 EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC, 16 EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, 17 }; 18 19 use crate::syscall::SyscallReturnCode; 20 21 /// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor. 22 #[derive(Debug)] 23 #[repr(i32)] 24 pub enum ControlOperation { 25 /// Add a file descriptor to the interest list. 26 Add = EPOLL_CTL_ADD, 27 /// Change the settings associated with a file descriptor that is 28 /// already in the interest list. 29 Modify = EPOLL_CTL_MOD, 30 /// Remove a file descriptor from the interest list. 31 Delete = EPOLL_CTL_DEL, 32 } 33 34 bitflags! { 35 /// The type of events we can monitor a file descriptor for. 36 pub struct EventSet: u32 { 37 /// The associated file descriptor is available for read operations. 38 const IN = EPOLLIN as u32; 39 /// The associated file descriptor is available for write operations. 40 const OUT = EPOLLOUT as u32; 41 /// Error condition happened on the associated file descriptor. 42 const ERROR = EPOLLERR as u32; 43 /// This can be used to detect peer shutdown when using Edge Triggered monitoring. 44 const READ_HANG_UP = EPOLLRDHUP as u32; 45 /// Sets the Edge Triggered behavior for the associated file descriptor. 46 /// The default behavior is Level Triggered. 47 const EDGE_TRIGGERED = EPOLLET as u32; 48 /// Hang up happened on the associated file descriptor. Note that `epoll_wait` 49 /// will always wait for this event and it is not necessary to set it in events. 50 const HANG_UP = EPOLLHUP as u32; 51 /// There is an exceptional condition on that file descriptor. It is mostly used to 52 /// set high priority for some data. 53 const PRIORITY = EPOLLPRI as u32; 54 /// The event is considered as being "processed" from the time when it is returned 55 /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same 56 /// epoll file descriptor, the closure of that file descriptor, the removal of the 57 /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP 58 /// for the event file descriptor via EPOLL_CTL_MOD. 59 const WAKE_UP = EPOLLWAKEUP as u32; 60 /// Sets the one-shot behavior for the associated file descriptor. 61 const ONE_SHOT = EPOLLONESHOT as u32; 62 /// Sets an exclusive wake up mode for the epoll file descriptor that is being 63 /// attached to the associated file descriptor. 64 /// When a wake up event occurs and multiple epoll file descriptors are attached to 65 /// the same target file using this mode, one or more of the epoll file descriptors 66 /// will receive an event with `epoll_wait`. The default here is for all those file 67 /// descriptors to receive an event. 68 const EXCLUSIVE = EPOLLEXCLUSIVE as u32; 69 } 70 } 71 72 /// Wrapper over 73 /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html). 74 // We are using `transparent` here to be super sure that this struct and its fields 75 // have the same alignment as those from the `epoll_event` struct from C. 76 #[repr(transparent)] 77 #[derive(Clone, Copy)] 78 pub struct EpollEvent(epoll_event); 79 80 impl std::fmt::Debug for EpollEvent { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result81 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 82 write!(f, "{{ events: {}, data: {} }}", self.events(), self.data()) 83 } 84 } 85 86 impl Deref for EpollEvent { 87 type Target = epoll_event; deref(&self) -> &Self::Target88 fn deref(&self) -> &Self::Target { 89 &self.0 90 } 91 } 92 93 impl Default for EpollEvent { default() -> Self94 fn default() -> Self { 95 EpollEvent(epoll_event { 96 events: 0u32, 97 u64: 0u64, 98 }) 99 } 100 } 101 102 impl EpollEvent { 103 /// Create a new epoll_event instance. 104 /// 105 /// # Arguments 106 /// 107 /// `events` - contains an event mask. 108 /// `data` - a user data variable. `data` field can be a fd on which 109 /// we want to monitor the events specified by `events`. 110 /// 111 /// # Examples 112 /// 113 /// ``` 114 /// extern crate vmm_sys_util; 115 /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; 116 /// 117 /// let event = EpollEvent::new(EventSet::IN, 2); 118 /// ``` new(events: EventSet, data: u64) -> Self119 pub fn new(events: EventSet, data: u64) -> Self { 120 EpollEvent(epoll_event { 121 events: events.bits(), 122 u64: data, 123 }) 124 } 125 126 /// Returns the `events` from 127 /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html). 128 /// 129 /// # Examples 130 /// 131 /// ``` 132 /// extern crate vmm_sys_util; 133 /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; 134 /// 135 /// let event = EpollEvent::new(EventSet::IN, 2); 136 /// assert_eq!(event.events(), 1); 137 /// ``` events(&self) -> u32138 pub fn events(&self) -> u32 { 139 self.events 140 } 141 142 /// Returns the `EventSet` corresponding to `epoll_event.events`. 143 /// 144 /// # Panics 145 /// 146 /// Panics if `libc::epoll_event` contains invalid events. 147 /// 148 /// 149 /// # Examples 150 /// 151 /// ``` 152 /// extern crate vmm_sys_util; 153 /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; 154 /// 155 /// let event = EpollEvent::new(EventSet::IN, 2); 156 /// assert_eq!(event.event_set(), EventSet::IN); 157 /// ``` event_set(&self) -> EventSet158 pub fn event_set(&self) -> EventSet { 159 // This unwrap is safe because `epoll_events` can only be user created or 160 // initialized by the kernel. We trust the kernel to only send us valid 161 // events. The user can only initialize `epoll_events` using valid events. 162 EventSet::from_bits(self.events()).unwrap() 163 } 164 165 /// Returns the `data` from the `libc::epoll_event`. 166 /// 167 /// # Examples 168 /// 169 /// ``` 170 /// extern crate vmm_sys_util; 171 /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; 172 /// 173 /// let event = EpollEvent::new(EventSet::IN, 2); 174 /// assert_eq!(event.data(), 2); 175 /// ``` data(&self) -> u64176 pub fn data(&self) -> u64 { 177 self.u64 178 } 179 180 /// Converts the `libc::epoll_event` data to a RawFd. 181 /// 182 /// This conversion is lossy when the data does not correspond to a RawFd 183 /// (data does not fit in a i32). 184 /// 185 /// # Examples 186 /// 187 /// ``` 188 /// extern crate vmm_sys_util; 189 /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; 190 /// 191 /// let event = EpollEvent::new(EventSet::IN, 2); 192 /// assert_eq!(event.fd(), 2); 193 /// ``` fd(&self) -> RawFd194 pub fn fd(&self) -> RawFd { 195 self.u64 as i32 196 } 197 } 198 199 /// Wrapper over epoll functionality. 200 #[derive(Debug)] 201 pub struct Epoll { 202 epoll_fd: RawFd, 203 } 204 205 impl Epoll { 206 /// Create a new epoll file descriptor. new() -> io::Result<Self>207 pub fn new() -> io::Result<Self> { 208 let epoll_fd = SyscallReturnCode( 209 // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`. 210 unsafe { epoll_create1(EPOLL_CLOEXEC) }, 211 ) 212 .into_result()?; 213 Ok(Epoll { epoll_fd }) 214 } 215 216 /// Wrapper for `libc::epoll_ctl`. 217 /// 218 /// This can be used for adding, modifying or removing a file descriptor in the 219 /// interest list of the epoll instance. 220 /// 221 /// # Arguments 222 /// 223 /// * `operation` - refers to the action to be performed on the file descriptor. 224 /// * `fd` - the file descriptor on which we want to perform `operation`. 225 /// * `event` - refers to the `epoll_event` instance that is linked to `fd`. 226 /// 227 /// # Examples 228 /// 229 /// ``` 230 /// extern crate vmm_sys_util; 231 /// 232 /// use std::os::unix::io::AsRawFd; 233 /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; 234 /// use vmm_sys_util::eventfd::EventFd; 235 /// 236 /// let epoll = Epoll::new().unwrap(); 237 /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 238 /// epoll 239 /// .ctl( 240 /// ControlOperation::Add, 241 /// event_fd.as_raw_fd() as i32, 242 /// EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64), 243 /// ) 244 /// .unwrap(); 245 /// epoll 246 /// .ctl( 247 /// ControlOperation::Modify, 248 /// event_fd.as_raw_fd() as i32, 249 /// EpollEvent::new(EventSet::IN, 4), 250 /// ) 251 /// .unwrap(); 252 /// ``` ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()>253 pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> { 254 SyscallReturnCode( 255 // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor 256 // to watch, as well as a valid epoll_event structure. We also check the return value. 257 unsafe { 258 epoll_ctl( 259 self.epoll_fd, 260 operation as i32, 261 fd, 262 &event as *const EpollEvent as *mut epoll_event, 263 ) 264 }, 265 ) 266 .into_empty_result() 267 } 268 269 /// Wrapper for `libc::epoll_wait`. 270 /// Returns the number of file descriptors in the interest list that became ready 271 /// for I/O or `errno` if an error occurred. 272 /// 273 /// # Arguments 274 /// 275 /// * `timeout` - specifies for how long the `epoll_wait` system call will block 276 /// (measured in milliseconds). 277 /// * `events` - points to a memory area that will be used for storing the events 278 /// returned by `epoll_wait()` call. 279 /// 280 /// # Examples 281 /// 282 /// ``` 283 /// extern crate vmm_sys_util; 284 /// 285 /// use std::os::unix::io::AsRawFd; 286 /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; 287 /// use vmm_sys_util::eventfd::EventFd; 288 /// 289 /// let epoll = Epoll::new().unwrap(); 290 /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 291 /// 292 /// let mut ready_events = vec![EpollEvent::default(); 10]; 293 /// epoll 294 /// .ctl( 295 /// ControlOperation::Add, 296 /// event_fd.as_raw_fd() as i32, 297 /// EpollEvent::new(EventSet::OUT, 4), 298 /// ) 299 /// .unwrap(); 300 /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap(); 301 /// assert_eq!(ev_count, 1); 302 /// ``` wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize>303 pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> { 304 let events_count = SyscallReturnCode( 305 // SAFETY: Safe because we give a valid epoll file descriptor and an array of 306 // epoll_event structures that will be modified by the kernel to indicate information 307 // about the subset of file descriptors in the interest list. 308 // We also check the return value. 309 unsafe { 310 epoll_wait( 311 self.epoll_fd, 312 events.as_mut_ptr() as *mut epoll_event, 313 events.len() as i32, 314 timeout, 315 ) 316 }, 317 ) 318 .into_result()? as usize; 319 320 Ok(events_count) 321 } 322 } 323 324 impl AsRawFd for Epoll { as_raw_fd(&self) -> RawFd325 fn as_raw_fd(&self) -> RawFd { 326 self.epoll_fd 327 } 328 } 329 330 impl Drop for Epoll { drop(&mut self)331 fn drop(&mut self) { 332 // SAFETY: Safe because this fd is opened with `epoll_create` and we trust 333 // the kernel to give us a valid fd. 334 unsafe { 335 libc::close(self.epoll_fd); 336 } 337 } 338 } 339 340 #[cfg(test)] 341 mod tests { 342 use super::*; 343 344 use crate::eventfd::EventFd; 345 346 #[test] test_event_ops()347 fn test_event_ops() { 348 let mut event = EpollEvent::default(); 349 assert_eq!(event.events(), 0); 350 assert_eq!(event.data(), 0); 351 352 event = EpollEvent::new(EventSet::IN, 2); 353 assert_eq!(event.events(), 1); 354 assert_eq!(event.event_set(), EventSet::IN); 355 356 assert_eq!(event.data(), 2); 357 assert_eq!(event.fd(), 2); 358 } 359 360 #[test] test_events_debug()361 fn test_events_debug() { 362 let events = EpollEvent::new(EventSet::IN, 42); 363 assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }") 364 } 365 366 #[test] test_epoll()367 fn test_epoll() { 368 const DEFAULT__TIMEOUT: i32 = 250; 369 const EVENT_BUFFER_SIZE: usize = 128; 370 371 let epoll = Epoll::new().unwrap(); 372 assert_eq!(epoll.epoll_fd, epoll.as_raw_fd()); 373 374 // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality. 375 376 let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 377 // For EPOLLOUT to be available it is enough only to be possible to write a value of 378 // at least 1 to the eventfd counter without blocking. 379 // If we write a value greater than 0 to this counter, the fd will be available for 380 // EPOLLIN events too. 381 event_fd_1.write(1).unwrap(); 382 383 let mut event_1 = 384 EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64); 385 386 // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into 387 // the interest list of epoll instance. 388 assert!(epoll 389 .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1) 390 .is_ok()); 391 392 // We can't add twice the same fd to epoll interest list. 393 assert!(epoll 394 .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1) 395 .is_err()); 396 397 let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 398 event_fd_2.write(1).unwrap(); 399 assert!(epoll 400 .ctl( 401 ControlOperation::Add, 402 event_fd_2.as_raw_fd(), 403 // For this fd, we want an Event instance that has `data` field set to other 404 // value than the value of the fd and `events` without EPOLLIN type set. 405 EpollEvent::new(EventSet::OUT, 10) 406 ) 407 .is_ok()); 408 409 // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN 410 // event to not be available for this fd, even if we say that we want to monitor this type 411 // of event via EPOLL_CTL_ADD operation. 412 let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 413 let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64); 414 assert!(epoll 415 .ctl(ControlOperation::Add, event_fd_3.as_raw_fd(), event_3) 416 .is_ok()); 417 418 // Let's check `epoll_wait()` behavior for our epoll instance. 419 let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE]; 420 let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); 421 422 // We expect to have 3 fds in the ready list of epoll instance. 423 assert_eq!(ev_count, 3); 424 425 // Let's check also the Event values that are now returned in the ready list. 426 assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64); 427 // For this fd, `data` field was populated with random data instead of the 428 // corresponding fd value. 429 assert_eq!(ready_events[1].data(), 10); 430 assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64); 431 432 // EPOLLIN and EPOLLOUT should be available for this fd. 433 assert_eq!( 434 ready_events[0].events(), 435 (EventSet::IN | EventSet::OUT).bits() 436 ); 437 // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd. 438 assert_eq!(ready_events[1].events(), EventSet::OUT.bits()); 439 // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value 440 // greater than 0 to it). 441 assert_eq!(ready_events[2].events(), EventSet::OUT.bits()); 442 443 // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD 444 // behavior. 445 // We create here a new Event with some events, other than those previously set, 446 // that we want to monitor this time on event_fd_1. 447 event_1 = EpollEvent::new(EventSet::OUT, 20); 448 assert!(epoll 449 .ctl(ControlOperation::Modify, event_fd_1.as_raw_fd(), event_1) 450 .is_ok()); 451 452 let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 453 // Can't modify a fd that wasn't added to epoll interest list. 454 assert!(epoll 455 .ctl( 456 ControlOperation::Modify, 457 event_fd_4.as_raw_fd(), 458 EpollEvent::default() 459 ) 460 .is_err()); 461 462 let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); 463 464 // Let's check that Event fields were indeed changed for the `event_fd_1` fd. 465 assert_eq!(ready_events[0].data(), 20); 466 // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation. 467 assert_eq!(ready_events[0].events(), EventSet::OUT.bits()); 468 469 // Now let's set for a fd to not have any events monitored. 470 assert!(epoll 471 .ctl( 472 ControlOperation::Modify, 473 event_fd_1.as_raw_fd(), 474 EpollEvent::default() 475 ) 476 .is_ok()); 477 478 // In this particular case we expect to remain only with 2 fds in the ready list. 479 ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); 480 assert_eq!(ev_count, 2); 481 482 // Let's also delete a fd from the interest list. 483 assert!(epoll 484 .ctl( 485 ControlOperation::Delete, 486 event_fd_2.as_raw_fd(), 487 EpollEvent::default() 488 ) 489 .is_ok()); 490 491 // We expect to have only one fd remained in the ready list (event_fd_3). 492 ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); 493 494 assert_eq!(ev_count, 1); 495 assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64); 496 assert_eq!(ready_events[0].events(), EventSet::OUT.bits()); 497 498 // If we try to remove a fd from epoll interest list that wasn't added before it will fail. 499 assert!(epoll 500 .ctl( 501 ControlOperation::Delete, 502 event_fd_4.as_raw_fd(), 503 EpollEvent::default() 504 ) 505 .is_err()); 506 } 507 } 508