1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // 3 // Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // 5 // SPDX-License-Identifier: BSD-3-Clause 6 7 //! Traits and structures for working with 8 //! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) 9 10 use std::cell::{Cell, Ref, RefCell}; 11 use std::cmp::min; 12 use std::fs::File; 13 use std::i32; 14 use std::i64; 15 use std::io::{stderr, Cursor, Write}; 16 use std::marker::PhantomData; 17 use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; 18 use std::ptr::null_mut; 19 use std::slice; 20 use std::thread; 21 use std::time::Duration; 22 23 use libc::{ 24 c_int, epoll_create1, epoll_ctl, epoll_event, epoll_wait, EINTR, EPOLLERR, EPOLLHUP, EPOLLIN, 25 EPOLLOUT, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, 26 }; 27 28 use crate::errno::{errno_result, Error, Result}; 29 30 macro_rules! handle_eintr_errno { 31 ($x:expr) => {{ 32 let mut res; 33 loop { 34 res = $x; 35 if res != -1 || Error::last() != Error::new(EINTR) { 36 break; 37 } 38 } 39 res 40 }}; 41 } 42 43 const POLL_CONTEXT_MAX_EVENTS: usize = 16; 44 45 /// A wrapper of raw `libc::epoll_event`. 46 /// 47 /// This should only be used with [`EpollContext`](struct.EpollContext.html). 48 pub struct EpollEvents(RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>); 49 50 impl std::fmt::Debug for EpollEvents { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result51 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 52 write!(f, "EpollEvents {{ ... }}") 53 } 54 } 55 56 impl EpollEvents { 57 /// Creates a new EpollEvents. new() -> EpollEvents58 pub fn new() -> EpollEvents { 59 EpollEvents(RefCell::new( 60 [epoll_event { events: 0, u64: 0 }; POLL_CONTEXT_MAX_EVENTS], 61 )) 62 } 63 } 64 65 impl Default for EpollEvents { default() -> Self66 fn default() -> Self { 67 Self::new() 68 } 69 } 70 71 /// Trait for a token that can be associated with an `fd` in a [`PollContext`](struct.PollContext.html). 72 /// 73 /// Simple enums that have no or primitive variant data can use the `#[derive(PollToken)]` 74 /// custom derive to implement this trait. 75 pub trait PollToken { 76 /// Converts this token into a u64 that can be turned back into a token via `from_raw_token`. as_raw_token(&self) -> u6477 fn as_raw_token(&self) -> u64; 78 79 /// Converts a raw token as returned from `as_raw_token` back into a token. 80 /// 81 /// It is invalid to give a raw token that was not returned via `as_raw_token` from the same 82 /// `Self`. The implementation can expect that this will never happen as a result of its usage 83 /// in `PollContext`. from_raw_token(data: u64) -> Self84 fn from_raw_token(data: u64) -> Self; 85 } 86 87 impl PollToken for usize { as_raw_token(&self) -> u6488 fn as_raw_token(&self) -> u64 { 89 *self as u64 90 } 91 from_raw_token(data: u64) -> Self92 fn from_raw_token(data: u64) -> Self { 93 data as Self 94 } 95 } 96 97 impl PollToken for u64 { as_raw_token(&self) -> u6498 fn as_raw_token(&self) -> u64 { 99 *self 100 } 101 from_raw_token(data: u64) -> Self102 fn from_raw_token(data: u64) -> Self { 103 data as Self 104 } 105 } 106 107 impl PollToken for u32 { as_raw_token(&self) -> u64108 fn as_raw_token(&self) -> u64 { 109 u64::from(*self) 110 } 111 from_raw_token(data: u64) -> Self112 fn from_raw_token(data: u64) -> Self { 113 data as Self 114 } 115 } 116 117 impl PollToken for u16 { as_raw_token(&self) -> u64118 fn as_raw_token(&self) -> u64 { 119 u64::from(*self) 120 } 121 from_raw_token(data: u64) -> Self122 fn from_raw_token(data: u64) -> Self { 123 data as Self 124 } 125 } 126 127 impl PollToken for u8 { as_raw_token(&self) -> u64128 fn as_raw_token(&self) -> u64 { 129 u64::from(*self) 130 } 131 from_raw_token(data: u64) -> Self132 fn from_raw_token(data: u64) -> Self { 133 data as Self 134 } 135 } 136 137 impl PollToken for () { as_raw_token(&self) -> u64138 fn as_raw_token(&self) -> u64 { 139 0 140 } 141 from_raw_token(_data: u64) -> Self142 fn from_raw_token(_data: u64) -> Self {} 143 } 144 145 /// An event returned by [`PollContext::wait`](struct.PollContext.html#method.wait). 146 pub struct PollEvent<'a, T> { 147 event: &'a epoll_event, 148 token: PhantomData<T>, // Needed to satisfy usage of T 149 } 150 151 impl<T> std::fmt::Debug for PollEvent<'_, T> { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result152 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 153 f.debug_struct("PollEvent") 154 .field("event", &"?") 155 .field("token", &self.token) 156 .finish() 157 } 158 } 159 160 impl<'a, T: PollToken> PollEvent<'a, T> { 161 /// Gets the token associated in 162 /// [`PollContext::add`](struct.PollContext.html#method.add) with this event. token(&self) -> T163 pub fn token(&self) -> T { 164 T::from_raw_token(self.event.u64) 165 } 166 167 /// Get the raw events returned by the kernel. raw_events(&self) -> u32168 pub fn raw_events(&self) -> u32 { 169 self.event.events 170 } 171 172 /// Checks if the event is readable. 173 /// 174 /// True if the `fd` associated with this token in 175 /// [`PollContext::add`](struct.PollContext.html#method.add) is readable. readable(&self) -> bool176 pub fn readable(&self) -> bool { 177 self.event.events & (EPOLLIN as u32) != 0 178 } 179 180 /// Checks if the event is writable. 181 /// 182 /// True if the `fd` associated with this token in 183 /// [`PollContext::add`](struct.PollContext.html#method.add) is writable. writable(&self) -> bool184 pub fn writable(&self) -> bool { 185 self.event.events & (EPOLLOUT as u32) != 0 186 } 187 188 /// Checks if the event has been hangup on. 189 /// 190 /// True if the `fd` associated with this token in 191 /// [`PollContext::add`](struct.PollContext.html#method.add) has been hungup on. hungup(&self) -> bool192 pub fn hungup(&self) -> bool { 193 self.event.events & (EPOLLHUP as u32) != 0 194 } 195 196 /// Checks if the event has associated error conditions. 197 /// 198 /// True if the `fd` associated with this token in 199 /// [`PollContext::add`](struct.PollContext.html#method.add) has associated error conditions. has_error(&self) -> bool200 pub fn has_error(&self) -> bool { 201 self.event.events & (EPOLLERR as u32) != 0 202 } 203 } 204 205 /// An iterator over a subset of events returned by 206 /// [`PollContext::wait`](struct.PollContext.html#method.wait). 207 #[derive(Debug)] 208 pub struct PollEventIter<'a, I, T> 209 where 210 I: Iterator<Item = &'a epoll_event>, 211 { 212 mask: u32, 213 iter: I, 214 tokens: PhantomData<[T]>, // Needed to satisfy usage of T 215 } 216 217 impl<'a, I, T> Iterator for PollEventIter<'a, I, T> 218 where 219 I: Iterator<Item = &'a epoll_event>, 220 T: PollToken, 221 { 222 type Item = PollEvent<'a, T>; next(&mut self) -> Option<Self::Item>223 fn next(&mut self) -> Option<Self::Item> { 224 let mask = self.mask; 225 self.iter 226 .find(|event| (event.events & mask) != 0) 227 .map(|event| PollEvent { 228 event, 229 token: PhantomData, 230 }) 231 } 232 } 233 234 /// The list of events returned by [`PollContext::wait`](struct.PollContext.html#method.wait). 235 pub struct PollEvents<'a, T> { 236 count: usize, 237 events: Ref<'a, [epoll_event; POLL_CONTEXT_MAX_EVENTS]>, 238 tokens: PhantomData<[T]>, // Needed to satisfy usage of T 239 } 240 241 impl<T> std::fmt::Debug for PollEvents<'_, T> { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result242 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 243 f.debug_struct("PollEventsOwned") 244 .field("count", &self.count) 245 .field("events", &"?") 246 .field("tokens", &self.tokens) 247 .finish() 248 } 249 } 250 251 impl<'a, T: PollToken> PollEvents<'a, T> { 252 /// Creates owned structure from borrowed [`PollEvents`](struct.PollEvents.html). 253 /// 254 /// Copies the events to an owned structure so the reference to this (and by extension 255 /// [`PollContext`](struct.PollContext.html)) can be dropped. to_owned(&self) -> PollEventsOwned<T>256 pub fn to_owned(&self) -> PollEventsOwned<T> { 257 PollEventsOwned { 258 count: self.count, 259 events: RefCell::new(*self.events), 260 tokens: PhantomData, 261 } 262 } 263 264 /// Iterates over each event. iter(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T>265 pub fn iter(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> { 266 PollEventIter { 267 mask: 0xffff_ffff, 268 iter: self.events[..self.count].iter(), 269 tokens: PhantomData, 270 } 271 } 272 273 /// Iterates over each readable event. iter_readable(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T>274 pub fn iter_readable(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> { 275 PollEventIter { 276 mask: EPOLLIN as u32, 277 iter: self.events[..self.count].iter(), 278 tokens: PhantomData, 279 } 280 } 281 282 /// Iterates over each hungup event. iter_hungup(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T>283 pub fn iter_hungup(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> { 284 PollEventIter { 285 mask: EPOLLHUP as u32, 286 iter: self.events[..self.count].iter(), 287 tokens: PhantomData, 288 } 289 } 290 } 291 292 /// A deep copy of the event records from [`PollEvents`](struct.PollEvents.html). 293 pub struct PollEventsOwned<T> { 294 count: usize, 295 events: RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>, 296 tokens: PhantomData<T>, // Needed to satisfy usage of T 297 } 298 299 impl<T> std::fmt::Debug for PollEventsOwned<T> { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result300 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 301 f.debug_struct("PollEventsOwned") 302 .field("count", &self.count) 303 .field("events", &"?") 304 .field("tokens", &self.tokens) 305 .finish() 306 } 307 } 308 309 impl<T: PollToken> PollEventsOwned<T> { 310 /// Creates borrowed structure from owned structure 311 /// [`PollEventsOwned`](struct.PollEventsOwned.html). 312 /// 313 /// Takes a reference to the events so it can be iterated via methods in 314 /// [`PollEvents`](struct.PollEvents.html). as_ref(&self) -> PollEvents<'_, T>315 pub fn as_ref(&self) -> PollEvents<'_, T> { 316 PollEvents { 317 count: self.count, 318 events: self.events.borrow(), 319 tokens: PhantomData, 320 } 321 } 322 } 323 324 /// Watching events taken by [`PollContext`](struct.PollContext.html). 325 #[derive(Debug, Copy, Clone)] 326 pub struct WatchingEvents(u32); 327 328 impl WatchingEvents { 329 /// Returns empty `WatchingEvents`. 330 #[inline(always)] empty() -> WatchingEvents331 pub fn empty() -> WatchingEvents { 332 WatchingEvents(0) 333 } 334 335 /// Creates a new `WatchingEvents` with a specified value. 336 /// 337 /// Builds `WatchingEvents` from raw `epoll_event`. 338 /// 339 /// # Arguments 340 /// 341 /// * `raw`: the events to be created for watching. 342 #[inline(always)] new(raw: u32) -> WatchingEvents343 pub fn new(raw: u32) -> WatchingEvents { 344 WatchingEvents(raw) 345 } 346 347 /// Sets read events. 348 /// 349 /// Sets the events to be readable. 350 #[inline(always)] set_read(self) -> WatchingEvents351 pub fn set_read(self) -> WatchingEvents { 352 WatchingEvents(self.0 | EPOLLIN as u32) 353 } 354 355 /// Sets write events. 356 /// 357 /// Sets the events to be writable. 358 #[inline(always)] set_write(self) -> WatchingEvents359 pub fn set_write(self) -> WatchingEvents { 360 WatchingEvents(self.0 | EPOLLOUT as u32) 361 } 362 363 /// Gets the underlying epoll events. get_raw(&self) -> u32364 pub fn get_raw(&self) -> u32 { 365 self.0 366 } 367 } 368 369 /// A wrapper of linux [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html). 370 /// 371 /// It provides similar interface to [`PollContext`](struct.PollContext.html). 372 /// It is thread safe while PollContext is not. It requires user to pass in a reference of 373 /// EpollEvents while PollContext does not. Always use PollContext if you don't need to access the 374 /// same epoll from different threads. 375 /// 376 /// # Examples 377 /// 378 /// ``` 379 /// extern crate vmm_sys_util; 380 /// use vmm_sys_util::eventfd::EventFd; 381 /// use vmm_sys_util::poll::{EpollContext, EpollEvents}; 382 /// 383 /// let evt = EventFd::new(0).unwrap(); 384 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 385 /// let events = EpollEvents::new(); 386 /// 387 /// evt.write(1).unwrap(); 388 /// ctx.add(&evt, 1).unwrap(); 389 /// 390 /// for event in ctx.wait(&events).unwrap().iter_readable() { 391 /// assert_eq!(event.token(), 1); 392 /// } 393 /// ``` 394 #[derive(Debug)] 395 pub struct EpollContext<T> { 396 epoll_ctx: File, 397 // Needed to satisfy usage of T 398 tokens: PhantomData<[T]>, 399 } 400 401 impl<T: PollToken> EpollContext<T> { 402 /// Creates a new `EpollContext`. 403 /// 404 /// Uses [`epoll_create1`](http://man7.org/linux/man-pages/man2/epoll_create.2.html) 405 /// to create a new epoll fd. 406 /// 407 /// # Examples 408 /// 409 /// ``` 410 /// extern crate vmm_sys_util; 411 /// use vmm_sys_util::poll::EpollContext; 412 /// 413 /// let ctx: EpollContext<usize> = EpollContext::new().unwrap(); 414 /// ``` new() -> Result<EpollContext<T>>415 pub fn new() -> Result<EpollContext<T>> { 416 // SAFETY: Safe because we check the return value. 417 let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) }; 418 if epoll_fd < 0 { 419 return errno_result(); 420 } 421 Ok(EpollContext { 422 // SAFETY: Safe because we verified that the FD is valid and we trust `epoll_create1`. 423 epoll_ctx: unsafe { File::from_raw_fd(epoll_fd) }, 424 tokens: PhantomData, 425 }) 426 } 427 428 /// Adds the given `fd` to this context and associates the given 429 /// `token` with the `fd`'s readable events. 430 /// 431 /// A `fd` can only be added once and does not need to be kept open. 432 /// If the `fd` is dropped and there were no duplicated file descriptors 433 /// (i.e. adding the same descriptor with a different FD number) added 434 /// to this context, events will not be reported by `wait` anymore. 435 /// 436 /// # Arguments 437 /// 438 /// * `fd`: the target file descriptor to be added. 439 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure. 440 /// 441 /// # Examples 442 /// 443 /// ``` 444 /// extern crate vmm_sys_util; 445 /// use vmm_sys_util::eventfd::EventFd; 446 /// use vmm_sys_util::poll::EpollContext; 447 /// 448 /// let evt = EventFd::new(0).unwrap(); 449 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 450 /// ctx.add(&evt, 1).unwrap(); 451 /// ``` add(&self, fd: &dyn AsRawFd, token: T) -> Result<()>452 pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> { 453 self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token) 454 } 455 456 /// Adds the given `fd` to this context, watching for the specified `events` 457 /// and associates the given 'token' with those events. 458 /// 459 /// A `fd` can only be added once and does not need to be kept open. If the `fd` 460 /// is dropped and there were no duplicated file descriptors (i.e. adding the same 461 /// descriptor with a different FD number) added to this context, events will 462 /// not be reported by `wait` anymore. 463 /// 464 /// # Arguments 465 /// 466 /// * `fd`: the target file descriptor to be added. 467 /// * `events`: specifies the events to be watched. 468 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure. 469 /// 470 /// # Examples 471 /// 472 /// ``` 473 /// extern crate vmm_sys_util; 474 /// use vmm_sys_util::eventfd::EventFd; 475 /// use vmm_sys_util::poll::{EpollContext, WatchingEvents}; 476 /// 477 /// let evt = EventFd::new(0).unwrap(); 478 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 479 /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1) 480 /// .unwrap(); 481 /// ``` add_fd_with_events( &self, fd: &dyn AsRawFd, events: WatchingEvents, token: T, ) -> Result<()>482 pub fn add_fd_with_events( 483 &self, 484 fd: &dyn AsRawFd, 485 events: WatchingEvents, 486 token: T, 487 ) -> Result<()> { 488 let mut evt = epoll_event { 489 events: events.get_raw(), 490 u64: token.as_raw_token(), 491 }; 492 // SAFETY: Safe because we give a valid epoll FD and FD to watch, as well as a 493 // valid epoll_event structure. Then we check the return value. 494 let ret = unsafe { 495 epoll_ctl( 496 self.epoll_ctx.as_raw_fd(), 497 EPOLL_CTL_ADD, 498 fd.as_raw_fd(), 499 &mut evt, 500 ) 501 }; 502 if ret < 0 { 503 return errno_result(); 504 }; 505 Ok(()) 506 } 507 508 /// Changes the setting associated with the given `fd` in this context. 509 /// 510 /// If `fd` was previously added to this context, the watched events will be replaced with 511 /// `events` and the token associated with it will be replaced with the given `token`. 512 /// 513 /// # Arguments 514 /// 515 /// * `fd`: the target file descriptor to be performed. 516 /// * `events`: specifies the events to be watched. 517 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure. 518 /// 519 /// # Examples 520 /// 521 /// ``` 522 /// extern crate vmm_sys_util; 523 /// use vmm_sys_util::eventfd::EventFd; 524 /// use vmm_sys_util::poll::{EpollContext, WatchingEvents}; 525 /// 526 /// let evt = EventFd::new(0).unwrap(); 527 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 528 /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1) 529 /// .unwrap(); 530 /// ctx.modify(&evt, WatchingEvents::empty().set_write(), 2) 531 /// .unwrap(); 532 /// ``` modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()>533 pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> { 534 let mut evt = epoll_event { 535 events: events.0, 536 u64: token.as_raw_token(), 537 }; 538 // SAFETY: Safe because we give a valid epoll FD and FD to modify, as well as a valid 539 // epoll_event structure. Then we check the return value. 540 let ret = unsafe { 541 epoll_ctl( 542 self.epoll_ctx.as_raw_fd(), 543 EPOLL_CTL_MOD, 544 fd.as_raw_fd(), 545 &mut evt, 546 ) 547 }; 548 if ret < 0 { 549 return errno_result(); 550 }; 551 Ok(()) 552 } 553 554 /// Deletes the given `fd` from this context. 555 /// 556 /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this 557 /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`. 558 /// Failure to do so will cause the `wait` method to always return immediately, causing ~100% 559 /// CPU load. 560 /// 561 /// # Arguments 562 /// 563 /// * `fd`: the target file descriptor to be removed. 564 /// 565 /// # Examples 566 /// 567 /// ``` 568 /// extern crate vmm_sys_util; 569 /// use vmm_sys_util::eventfd::EventFd; 570 /// use vmm_sys_util::poll::EpollContext; 571 /// 572 /// let evt = EventFd::new(0).unwrap(); 573 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 574 /// ctx.add(&evt, 1).unwrap(); 575 /// ctx.delete(&evt).unwrap(); 576 /// ``` delete(&self, fd: &dyn AsRawFd) -> Result<()>577 pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> { 578 // SAFETY: Safe because we give a valid epoll FD and FD to stop watching. Then we check 579 // the return value. 580 let ret = unsafe { 581 epoll_ctl( 582 self.epoll_ctx.as_raw_fd(), 583 EPOLL_CTL_DEL, 584 fd.as_raw_fd(), 585 null_mut(), 586 ) 587 }; 588 if ret < 0 { 589 return errno_result(); 590 }; 591 Ok(()) 592 } 593 594 /// Waits for any events to occur in FDs that were previously added to this context. 595 /// 596 /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading 597 /// for readable events and not closing for hungup events), subsequent calls to `wait` will 598 /// return immediately. The consequence of not handling an event perpetually while calling 599 /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to 600 /// ~100% usage. 601 /// 602 /// # Arguments 603 /// 604 /// * `events`: the events to wait for. 605 /// 606 /// # Examples 607 /// 608 /// ``` 609 /// extern crate vmm_sys_util; 610 /// use vmm_sys_util::eventfd::EventFd; 611 /// use vmm_sys_util::poll::{EpollContext, EpollEvents}; 612 /// 613 /// let evt = EventFd::new(0).unwrap(); 614 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 615 /// let events = EpollEvents::new(); 616 /// 617 /// evt.write(1).unwrap(); 618 /// ctx.add(&evt, 1).unwrap(); 619 /// 620 /// for event in ctx.wait(&events).unwrap().iter_readable() { 621 /// assert_eq!(event.token(), 1); 622 /// } 623 /// ``` wait<'a>(&self, events: &'a EpollEvents) -> Result<PollEvents<'a, T>>624 pub fn wait<'a>(&self, events: &'a EpollEvents) -> Result<PollEvents<'a, T>> { 625 self.wait_timeout(events, Duration::new(i64::MAX as u64, 0)) 626 } 627 628 /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a 629 /// maximum of the given `timeout`. 630 /// 631 /// This may return earlier than `timeout` with zero events if the duration indicated exceeds 632 /// system limits. 633 /// 634 /// # Arguments 635 /// 636 /// * `events`: the events to wait for. 637 /// * `timeout`: specifies the timeout that will block. 638 /// 639 /// # Examples 640 /// 641 /// ``` 642 /// extern crate vmm_sys_util; 643 /// # use std::time::Duration; 644 /// use vmm_sys_util::eventfd::EventFd; 645 /// use vmm_sys_util::poll::{EpollContext, EpollEvents}; 646 /// 647 /// let evt = EventFd::new(0).unwrap(); 648 /// let ctx: EpollContext<u32> = EpollContext::new().unwrap(); 649 /// let events = EpollEvents::new(); 650 /// 651 /// evt.write(1).unwrap(); 652 /// ctx.add(&evt, 1).unwrap(); 653 /// for event in ctx 654 /// .wait_timeout(&events, Duration::new(100, 0)) 655 /// .unwrap() 656 /// .iter_readable() 657 /// { 658 /// assert_eq!(event.token(), 1); 659 /// } 660 /// ``` wait_timeout<'a>( &self, events: &'a EpollEvents, timeout: Duration, ) -> Result<PollEvents<'a, T>>661 pub fn wait_timeout<'a>( 662 &self, 663 events: &'a EpollEvents, 664 timeout: Duration, 665 ) -> Result<PollEvents<'a, T>> { 666 let timeout_millis = if timeout.as_secs() as i64 == i64::max_value() { 667 // We make the convenient assumption that 2^63 seconds is an effectively unbounded time 668 // frame. This is meant to mesh with `wait` calling us with no timeout. 669 -1 670 } else { 671 // In cases where we the number of milliseconds would overflow an i32, we substitute the 672 // maximum timeout which is ~24.8 days. 673 let millis = timeout 674 .as_secs() 675 .checked_mul(1_000) 676 .and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000)) 677 .unwrap_or(i32::max_value() as u64); 678 min(i32::max_value() as u64, millis) as i32 679 }; 680 let ret = { 681 let mut epoll_events = events.0.borrow_mut(); 682 let max_events = epoll_events.len() as c_int; 683 // SAFETY: Safe because we give an epoll context and a properly sized epoll_events 684 // array pointer, which we trust the kernel to fill in properly. 685 unsafe { 686 handle_eintr_errno!(epoll_wait( 687 self.epoll_ctx.as_raw_fd(), 688 &mut epoll_events[0], 689 max_events, 690 timeout_millis 691 )) 692 } 693 }; 694 if ret < 0 { 695 return errno_result(); 696 } 697 let epoll_events = events.0.borrow(); 698 let events = PollEvents { 699 count: ret as usize, 700 events: epoll_events, 701 tokens: PhantomData, 702 }; 703 Ok(events) 704 } 705 } 706 707 impl<T: PollToken> AsRawFd for EpollContext<T> { as_raw_fd(&self) -> RawFd708 fn as_raw_fd(&self) -> RawFd { 709 self.epoll_ctx.as_raw_fd() 710 } 711 } 712 713 impl<T: PollToken> IntoRawFd for EpollContext<T> { into_raw_fd(self) -> RawFd714 fn into_raw_fd(self) -> RawFd { 715 self.epoll_ctx.into_raw_fd() 716 } 717 } 718 719 /// Used to poll multiple objects that have file descriptors. 720 /// 721 /// # Example 722 /// 723 /// ``` 724 /// # use vmm_sys_util::errno::Result; 725 /// # use vmm_sys_util::eventfd::EventFd; 726 /// # use vmm_sys_util::poll::{PollContext, PollEvents}; 727 /// let evt1 = EventFd::new(0).unwrap(); 728 /// let evt2 = EventFd::new(0).unwrap(); 729 /// evt2.write(1).unwrap(); 730 /// 731 /// let ctx: PollContext<u32> = PollContext::new().unwrap(); 732 /// ctx.add(&evt1, 1).unwrap(); 733 /// ctx.add(&evt2, 2).unwrap(); 734 /// 735 /// let pollevents: PollEvents<u32> = ctx.wait().unwrap(); 736 /// let tokens: Vec<u32> = pollevents.iter_readable().map(|e| e.token()).collect(); 737 /// assert_eq!(&tokens[..], &[2]); 738 /// ``` 739 #[derive(Debug)] 740 pub struct PollContext<T> { 741 epoll_ctx: EpollContext<T>, 742 743 // We use a RefCell here so that the `wait` method only requires an immutable self reference 744 // while returning the events (encapsulated by PollEvents). Without the RefCell, `wait` would 745 // hold a mutable reference that lives as long as its returned reference (i.e. the PollEvents), 746 // even though that reference is immutable. This is terribly inconvenient for the caller because 747 // the borrow checking would prevent them from using `delete` and `add` while the events are in 748 // scope. 749 events: EpollEvents, 750 751 // Hangup busy loop detection variables. See `check_for_hungup_busy_loop`. 752 check_for_hangup: bool, 753 hangups: Cell<usize>, 754 max_hangups: Cell<usize>, 755 } 756 757 impl<T: PollToken> PollContext<T> { 758 /// Creates a new `PollContext`. new() -> Result<PollContext<T>>759 pub fn new() -> Result<PollContext<T>> { 760 Ok(PollContext { 761 epoll_ctx: EpollContext::new()?, 762 events: EpollEvents::new(), 763 check_for_hangup: true, 764 hangups: Cell::new(0), 765 max_hangups: Cell::new(0), 766 }) 767 } 768 769 /// Enable/disable of checking for unhandled hangup events. set_check_for_hangup(&mut self, enable: bool)770 pub fn set_check_for_hangup(&mut self, enable: bool) { 771 self.check_for_hangup = enable; 772 } 773 774 /// Adds the given `fd` to this context and associates the given `token` with the `fd`'s 775 /// readable events. 776 /// 777 /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and 778 /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different 779 /// FD number) added to this context, events will not be reported by `wait` anymore. 780 /// 781 /// # Arguments 782 /// 783 /// * `fd`: the target file descriptor to be added. 784 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure. add(&self, fd: &dyn AsRawFd, token: T) -> Result<()>785 pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> { 786 self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token) 787 } 788 789 /// Adds the given `fd` to this context, watching for the specified events and associates the 790 /// given 'token' with those events. 791 /// 792 /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and 793 /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different 794 /// FD number) added to this context, events will not be reported by `wait` anymore. 795 /// 796 /// # Arguments 797 /// 798 /// * `fd`: the target file descriptor to be added. 799 /// * `events`: specifies the events to be watched. 800 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure. add_fd_with_events( &self, fd: &dyn AsRawFd, events: WatchingEvents, token: T, ) -> Result<()>801 pub fn add_fd_with_events( 802 &self, 803 fd: &dyn AsRawFd, 804 events: WatchingEvents, 805 token: T, 806 ) -> Result<()> { 807 self.epoll_ctx.add_fd_with_events(fd, events, token)?; 808 self.hangups.set(0); 809 self.max_hangups.set(self.max_hangups.get() + 1); 810 Ok(()) 811 } 812 813 /// Changes the setting associated with the given `fd` in this context. 814 /// 815 /// If `fd` was previously added to this context, the watched events will be replaced with 816 /// `events` and the token associated with it will be replaced with the given `token`. 817 /// 818 /// # Arguments 819 /// 820 /// * `fd`: the target file descriptor to be modified. 821 /// * `events`: specifies the events to be watched. 822 /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure. modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()>823 pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> { 824 self.epoll_ctx.modify(fd, events, token) 825 } 826 827 /// Deletes the given `fd` from this context. 828 /// 829 /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this 830 /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`. 831 /// Failure to do so will cause the `wait` method to always return immediately, causing ~100% 832 /// CPU load. 833 /// 834 /// # Arguments 835 /// 836 /// * `fd`: the target file descriptor to be removed. delete(&self, fd: &dyn AsRawFd) -> Result<()>837 pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> { 838 self.epoll_ctx.delete(fd)?; 839 self.hangups.set(0); 840 self.max_hangups.set(self.max_hangups.get() - 1); 841 Ok(()) 842 } 843 844 // This method determines if the the user of wait is misusing the `PollContext` by leaving FDs 845 // in this `PollContext` that have been shutdown or hungup on. Such an FD will cause `wait` to 846 // return instantly with a hungup event. If that FD is perpetually left in this context, a busy 847 // loop burning ~100% of one CPU will silently occur with no human visible malfunction. 848 // 849 // How do we know if the client of this context is ignoring hangups? A naive implementation 850 // would trigger if consecutive wait calls yield hangup events, but there are legitimate cases 851 // for this, such as two distinct sockets becoming hungup across two consecutive wait calls. A 852 // smarter implementation would only trigger if `delete` wasn't called between waits that 853 // yielded hangups. Sadly `delete` isn't the only way to remove an FD from this context. The 854 // other way is for the client to close the hungup FD, which automatically removes it from this 855 // context. Assuming that the client always uses close, this implementation would too eagerly 856 // trigger. 857 // 858 // The implementation used here keeps an upper bound of FDs in this context using a counter 859 // hooked into add/delete (which is imprecise because close can also remove FDs without us 860 // knowing). The number of consecutive (no add or delete in between) hangups yielded by wait 861 // calls is counted and compared to the upper bound. If the upper bound is exceeded by the 862 // consecutive hangups, the implementation triggers the check and logs. 863 // 864 // This implementation has false negatives because the upper bound can be completely too high, 865 // in the worst case caused by only using close instead of delete. However, this method has the 866 // advantage of always triggering eventually genuine busy loop cases, requires no dynamic 867 // allocations, is fast and constant time to compute, and has no false positives. check_for_hungup_busy_loop(&self, new_hangups: usize)868 fn check_for_hungup_busy_loop(&self, new_hangups: usize) { 869 let old_hangups = self.hangups.get(); 870 let max_hangups = self.max_hangups.get(); 871 if old_hangups <= max_hangups && old_hangups + new_hangups > max_hangups { 872 let mut buf = [0u8; 512]; 873 let (res, len) = { 874 let mut buf_cursor = Cursor::new(&mut buf[..]); 875 // Oops, clippy bug. See https://github.com/rust-lang/rust-clippy/issues/9810 876 #[allow(clippy::write_literal)] 877 ( 878 writeln!( 879 &mut buf_cursor, 880 "[{}:{}] busy poll wait loop with hungup FDs detected on thread {}\n", 881 file!(), 882 line!(), 883 thread::current().name().unwrap_or("") 884 ), 885 buf_cursor.position() as usize, 886 ) 887 }; 888 889 if res.is_ok() { 890 let _ = stderr().write_all(&buf[..len]); 891 } 892 // This panic is helpful for tests of this functionality. 893 #[cfg(test)] 894 panic!("hungup busy loop detected"); 895 } 896 self.hangups.set(old_hangups + new_hangups); 897 } 898 899 /// Waits for any events to occur in FDs that were previously added to this context. 900 /// 901 /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading 902 /// for readable events and not closing for hungup events), subsequent calls to `wait` will 903 /// return immediately. The consequence of not handling an event perpetually while calling 904 /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to 905 /// ~100% usage. 906 /// 907 /// # Panics 908 /// Panics if the returned `PollEvents` structure is not dropped before subsequent `wait` calls. wait(&self) -> Result<PollEvents<'_, T>>909 pub fn wait(&self) -> Result<PollEvents<'_, T>> { 910 self.wait_timeout(Duration::new(i64::MAX as u64, 0)) 911 } 912 913 /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a 914 /// maximum of the given `timeout`. 915 /// 916 /// This may return earlier than `timeout` with zero events if the duration indicated exceeds 917 /// system limits. 918 /// 919 /// # Arguments 920 /// 921 /// * `timeout`: specify the time that will block. wait_timeout(&self, timeout: Duration) -> Result<PollEvents<'_, T>>922 pub fn wait_timeout(&self, timeout: Duration) -> Result<PollEvents<'_, T>> { 923 let events = self.epoll_ctx.wait_timeout(&self.events, timeout)?; 924 let hangups = events.iter_hungup().count(); 925 if self.check_for_hangup { 926 self.check_for_hungup_busy_loop(hangups); 927 } 928 Ok(events) 929 } 930 } 931 932 impl<T: PollToken> AsRawFd for PollContext<T> { as_raw_fd(&self) -> RawFd933 fn as_raw_fd(&self) -> RawFd { 934 self.epoll_ctx.as_raw_fd() 935 } 936 } 937 938 impl<T: PollToken> IntoRawFd for PollContext<T> { into_raw_fd(self) -> RawFd939 fn into_raw_fd(self) -> RawFd { 940 self.epoll_ctx.into_raw_fd() 941 } 942 } 943 944 #[cfg(test)] 945 mod tests { 946 use super::*; 947 use crate::eventfd::EventFd; 948 use std::os::unix::net::UnixStream; 949 use std::time::Instant; 950 951 #[test] test_poll_context()952 fn test_poll_context() { 953 let evt1 = EventFd::new(0).unwrap(); 954 let evt2 = EventFd::new(0).unwrap(); 955 evt1.write(1).unwrap(); 956 evt2.write(1).unwrap(); 957 let ctx: PollContext<u32> = PollContext::new().unwrap(); 958 ctx.add(&evt1, 1).unwrap(); 959 ctx.add(&evt2, 2).unwrap(); 960 961 let mut evt_count = 0; 962 while evt_count < 2 { 963 for event in ctx.wait().unwrap().iter_readable() { 964 evt_count += 1; 965 match event.token() { 966 1 => { 967 evt1.read().unwrap(); 968 ctx.delete(&evt1).unwrap(); 969 } 970 2 => { 971 evt2.read().unwrap(); 972 ctx.delete(&evt2).unwrap(); 973 } 974 _ => panic!("unexpected token"), 975 }; 976 } 977 } 978 assert_eq!(evt_count, 2); 979 } 980 981 #[test] test_poll_context_overflow()982 fn test_poll_context_overflow() { 983 const EVT_COUNT: usize = POLL_CONTEXT_MAX_EVENTS * 2 + 1; 984 let ctx: PollContext<usize> = PollContext::new().unwrap(); 985 let mut evts = Vec::with_capacity(EVT_COUNT); 986 for i in 0..EVT_COUNT { 987 let evt = EventFd::new(0).unwrap(); 988 evt.write(1).unwrap(); 989 ctx.add(&evt, i).unwrap(); 990 evts.push(evt); 991 } 992 let mut evt_count = 0; 993 while evt_count < EVT_COUNT { 994 for event in ctx.wait().unwrap().iter_readable() { 995 evts[event.token()].read().unwrap(); 996 evt_count += 1; 997 } 998 } 999 } 1000 1001 #[test] 1002 #[should_panic] test_poll_context_hungup()1003 fn test_poll_context_hungup() { 1004 let (s1, s2) = UnixStream::pair().unwrap(); 1005 let ctx: PollContext<u32> = PollContext::new().unwrap(); 1006 ctx.add(&s1, 1).unwrap(); 1007 1008 // Causes s1 to receive hangup events, which we purposefully ignore to trip the detection 1009 // logic in `PollContext`. 1010 drop(s2); 1011 1012 // Should easily panic within this many iterations. 1013 for _ in 0..1000 { 1014 ctx.wait().unwrap(); 1015 } 1016 } 1017 1018 #[test] test_poll_context_timeout()1019 fn test_poll_context_timeout() { 1020 let mut ctx: PollContext<u32> = PollContext::new().unwrap(); 1021 let dur = Duration::from_millis(10); 1022 let start_inst = Instant::now(); 1023 1024 ctx.set_check_for_hangup(false); 1025 ctx.wait_timeout(dur).unwrap(); 1026 assert!(start_inst.elapsed() >= dur); 1027 } 1028 1029 #[test] test_poll_event()1030 fn test_poll_event() { 1031 let event = epoll_event { 1032 events: (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32, 1033 u64: 0x10, 1034 }; 1035 let ev = PollEvent::<u32> { 1036 event: &event, 1037 token: PhantomData, 1038 }; 1039 1040 assert_eq!(ev.token(), 0x10); 1041 assert!(ev.readable()); 1042 assert!(ev.writable()); 1043 assert!(ev.hungup()); 1044 assert!(ev.has_error()); 1045 assert_eq!( 1046 ev.raw_events(), 1047 (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32 1048 ); 1049 } 1050 } 1051