1 // Copyright 2022 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #![deny(missing_docs)] 6 7 use std::fs::File; 8 use std::ops::Range; 9 use std::os::unix::fs::FileExt; 10 11 use base::error; 12 use base::linux::MemoryMappingUnix; 13 use base::MemoryMapping; 14 use base::MemoryMappingBuilder; 15 use base::MmapError; 16 use base::Protection; 17 use base::VolatileMemory; 18 use base::VolatileMemoryError; 19 use base::VolatileSlice; 20 use thiserror::Error as ThisError; 21 22 use crate::pagesize::bytes_to_pages; 23 use crate::pagesize::is_page_aligned; 24 use crate::pagesize::pages_to_bytes; 25 26 pub type Result<T> = std::result::Result<T, Error>; 27 28 // On 4KB page size system, guest memory must be less than 8 TiB which is reasonable assumption. 29 const MAX_PAGE_IDX: usize = (1 << 31) - 2; 30 31 #[derive(ThisError, Debug)] 32 pub enum Error { 33 #[error("failed to io: {0}")] 34 Io(#[from] std::io::Error), 35 #[error("failed to mmap operation ({0}): {1}")] 36 Mmap(&'static str, MmapError), 37 #[error("failed to volatile memory operation: {0}")] 38 VolatileMemory(#[from] VolatileMemoryError), 39 #[error("index is out of range")] 40 OutOfRange, 41 #[error("data size is invalid")] 42 InvalidSize, 43 #[error("index is invalid")] 44 InvalidIndex, 45 } 46 47 /// u32 to pack the state of a page on the file. 48 /// 49 /// * MSB: Whether the page on file is freed. (1: freed, 2: allocated) 50 /// * lower 31 bits: 51 /// * The corresponding page index if the file page is allocated. 52 /// * The file page index + 1 of next freed file page if the file page is freed. Zero means it is 53 /// the last page in the free list. 54 #[derive(Debug)] 55 struct FilePageState(u32); 56 57 impl FilePageState { 58 const FREED_BIT_MASK: u32 = 1 << 31; 59 freed_state(first_freed_page: Option<usize>) -> Self60 fn freed_state(first_freed_page: Option<usize>) -> Self { 61 Self( 62 Self::FREED_BIT_MASK 63 | first_freed_page 64 .map(|idx_file| idx_file as u32 + 1) 65 .unwrap_or(0), 66 ) 67 } 68 allocated_state(idx_page: usize) -> Option<Self>69 fn allocated_state(idx_page: usize) -> Option<Self> { 70 if idx_page <= MAX_PAGE_IDX { 71 Some(Self(idx_page as u32)) 72 } else { 73 // idx_page is invalid. 74 None 75 } 76 } 77 is_freed(&self) -> bool78 fn is_freed(&self) -> bool { 79 self.0 & Self::FREED_BIT_MASK != 0 80 } 81 82 /// This is valid only if the page is freed. next_file_freed_idx(&self) -> Option<Option<usize>>83 fn next_file_freed_idx(&self) -> Option<Option<usize>> { 84 if self.is_freed() { 85 let next_idx_file = !Self::FREED_BIT_MASK & self.0; 86 if next_idx_file == 0 { 87 Some(None) 88 } else { 89 Some(Some(next_idx_file as usize - 1)) 90 } 91 } else { 92 None 93 } 94 } 95 96 /// This is valid only if the page is allocated. idx_page(&self) -> Option<usize>97 fn idx_page(&self) -> Option<usize> { 98 if self.is_freed() { 99 // The file page is freed. 100 None 101 } else { 102 Some(self.0 as usize) 103 } 104 } 105 } 106 107 #[derive(Debug)] 108 struct FilePageStates { 109 /// Freed pages on the swap file are managed in a free list. `first_freed_idx_file` points to 110 /// the first page index in the list. 111 first_idx_file_freed: Option<usize>, 112 states: Vec<FilePageState>, 113 } 114 115 impl FilePageStates { new(capacity: usize) -> Self116 fn new(capacity: usize) -> Self { 117 FilePageStates { 118 first_idx_file_freed: None, 119 states: Vec::with_capacity(capacity), 120 } 121 } 122 len(&self) -> usize123 fn len(&self) -> usize { 124 self.states.len() 125 } 126 127 /// Free a page on swap file. free(&mut self, idx_file: usize)128 fn free(&mut self, idx_file: usize) { 129 self.states[idx_file] = FilePageState::freed_state(self.first_idx_file_freed); 130 self.first_idx_file_freed = Some(idx_file); 131 } 132 133 /// Allocates a file page on the swap file. 134 /// 135 /// This returns the index of the allocated file page. 136 /// 137 /// This reuses freed file pages first. If the free list is empty, this allocates new pages in 138 /// the file. allocate(&mut self, idx_page: usize) -> usize139 fn allocate(&mut self, idx_page: usize) -> usize { 140 if let Some(idx_file_freed) = self.first_idx_file_freed { 141 // TODO(kawasin): Collect consecutive freed pages in the free list to reduce number of 142 // writes. 143 let Some(next_idx_file_freed) = self.states[idx_file_freed].next_file_freed_idx() 144 else { 145 unreachable!("pages in free list must be freed pages") 146 }; 147 let Some(state) = FilePageState::allocated_state(idx_page) else { 148 unreachable!("idx_page must be less than MAX_PAGE_IDX"); 149 }; 150 self.states[idx_file_freed] = state; 151 self.first_idx_file_freed = next_idx_file_freed; 152 153 idx_file_freed 154 } else { 155 // The free list is empty. Allocate new pages. 156 let head_idx_file = self.states.len(); 157 let Some(state) = FilePageState::allocated_state(idx_page) else { 158 unreachable!("idx must be less than MAX_PAGE_IDX"); 159 }; 160 self.states.push(state); 161 head_idx_file 162 } 163 } 164 165 /// Find the index range of file pages that are all present. 166 /// 167 /// This returns the pair of range of file page indexes and the index of the corresponding first 168 /// page. 169 /// 170 /// Returns `None` if no pages after `idx_file` are present. 171 /// 172 /// # Arguments 173 /// 174 /// * `idx_file` - The first index to start searching from. 175 /// * `page_states` - The page states 176 /// * `max_pages` - The maximum number of pages to search. 177 /// * `consecutive` - If true, the pages must have consecutive idx_page values. find_present_pages_range( &self, idx_file: usize, page_states: &[PageState], max_pages: usize, consecutive: bool, ) -> Option<(Range<usize>, usize)>178 fn find_present_pages_range( 179 &self, 180 idx_file: usize, 181 page_states: &[PageState], 182 max_pages: usize, 183 consecutive: bool, 184 ) -> Option<(Range<usize>, usize)> { 185 let next_head_idx_offset = self.states[idx_file..].iter().position(|state| { 186 !state.is_freed() 187 && page_states[state 188 .idx_page() 189 .unwrap_or_else(|| unreachable!("the page is not freed"))] 190 .is_present() 191 })?; 192 let idx_file = idx_file + next_head_idx_offset; 193 194 let Some(head_idx_page) = self.states[idx_file].idx_page() else { 195 unreachable!("the file page must not be freed"); 196 }; 197 198 let mut pages = 1; 199 200 if max_pages > 1 { 201 for state in self.states[idx_file + 1..].iter() { 202 if state.is_freed() { 203 break; 204 } else { 205 let Some(idx_page) = state.idx_page() else { 206 unreachable!("allocated page must have idx_page"); 207 }; 208 if !page_states[idx_page].is_present() 209 || (consecutive && idx_page != head_idx_page + pages) 210 { 211 break; 212 } 213 } 214 215 pages += 1; 216 if pages >= max_pages { 217 break; 218 } 219 } 220 } 221 222 Some((idx_file..idx_file + pages, head_idx_page)) 223 } 224 } 225 226 /// u32 to pack the state of a guest memory page. 227 /// 228 /// * If the page is not on the swap file, the value is zero. 229 /// * MSB: Whether the page is stale or not. (0: stale, 1: present). 230 /// * lower 31 bits: The corresponding file page index + 1. Never be zero. 231 #[derive(Clone, Debug)] 232 struct PageState(u32); 233 234 impl PageState { 235 const IDX_FILE_MASK: u32 = (1 << 31) - 1; 236 const PRESENT_BIT_MASK: u32 = 1 << 31; 237 is_none(&self) -> bool238 fn is_none(&self) -> bool { 239 self.0 == 0 240 } 241 idx_file(&self) -> Option<usize>242 fn idx_file(&self) -> Option<usize> { 243 if self.0 != 0 { 244 Some((self.0 & Self::IDX_FILE_MASK) as usize - 1) 245 } else { 246 None 247 } 248 } 249 is_present(&self) -> bool250 fn is_present(&self) -> bool { 251 self.0 & Self::PRESENT_BIT_MASK != 0 252 } 253 update(&mut self, idx_file: usize)254 fn update(&mut self, idx_file: usize) { 255 self.0 = (idx_file as u32 + 1) | Self::PRESENT_BIT_MASK; 256 } 257 mark_as_present(&mut self)258 fn mark_as_present(&mut self) { 259 self.0 |= Self::PRESENT_BIT_MASK; 260 } 261 clear(&mut self)262 fn clear(&mut self) { 263 self.0 &= !Self::PRESENT_BIT_MASK; 264 } 265 free(&mut self)266 fn free(&mut self) { 267 self.0 = 0; 268 } 269 } 270 271 /// [SwapFile] stores active pages in a memory region. 272 /// 273 /// This shares the swap file with other regions and creates mmap corresponding range in the file. 274 /// 275 /// TODO(kawasin): The file structure is straightforward and is not optimized yet. 276 /// Each page in the file corresponds to the page in the memory region. 277 #[derive(Debug)] 278 pub struct SwapFile<'a> { 279 file: &'a File, 280 file_mmap: MemoryMapping, 281 page_states: Vec<PageState>, 282 file_states: FilePageStates, 283 // All the data pages before this index are mlock(2)ed. 284 cursor_mlock: usize, 285 min_possible_present_idx_file: usize, 286 } 287 288 impl<'a> SwapFile<'a> { 289 /// Creates an initialized [SwapFile] for a memory region. 290 /// 291 /// The all pages are marked as empty at first time. 292 /// 293 /// # Arguments 294 /// 295 /// * `file` - The swap file. 296 /// * `num_of_pages` - The number of pages in the region. new(file: &'a File, num_of_pages: usize) -> Result<Self>297 pub fn new(file: &'a File, num_of_pages: usize) -> Result<Self> { 298 if num_of_pages > MAX_PAGE_IDX { 299 return Err(Error::InvalidSize); 300 } 301 let file_mmap = MemoryMappingBuilder::new(pages_to_bytes(num_of_pages)) 302 .from_file(file) 303 .protection(Protection::read()) 304 .build() 305 .map_err(|e| Error::Mmap("create", e))?; 306 Ok(Self { 307 file, 308 file_mmap, 309 page_states: vec![PageState(0); num_of_pages], 310 file_states: FilePageStates::new(num_of_pages), 311 cursor_mlock: 0, 312 min_possible_present_idx_file: 0, 313 }) 314 } 315 316 /// Returns a content of the page corresponding to the index if it is present. 317 /// 318 /// Returns [Option::None] if no content in the file. 319 /// 320 /// Returns [Error::OutOfRange] if the `idx` is out of range. 321 /// 322 /// # Arguments 323 /// 324 /// * `idx_page` - the index of the page from the head of the pages. page_content( &self, idx_page: usize, allow_cleared: bool, ) -> Result<Option<VolatileSlice>>325 pub fn page_content( 326 &self, 327 idx_page: usize, 328 allow_cleared: bool, 329 ) -> Result<Option<VolatileSlice>> { 330 let state = self.page_states.get(idx_page).ok_or(Error::OutOfRange)?; 331 if !state.is_none() && (allow_cleared || state.is_present()) { 332 let Some(idx_file) = state.idx_file() else { 333 unreachable!("the page is not none"); 334 }; 335 return match self 336 .file_mmap 337 .get_slice(pages_to_bytes(idx_file), pages_to_bytes(1)) 338 { 339 Ok(slice) => Ok(Some(slice)), 340 Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange), 341 Err(e) => Err(e.into()), 342 }; 343 } 344 Ok(None) 345 } 346 347 /// Start readahead the swap file into the page cache from the head. 348 /// 349 /// This also `mlock2(2)` the pages not to be dropped again after populated. This does not block 350 /// the caller thread by I/O wait because: 351 /// 352 /// * `mlock2(2)` is executed with `MLOCK_ONFAULT`. 353 /// * `MADV_WILLNEED` is the same as `readahead(2)` which triggers the readahead background. 354 /// * However Linux has a bug that `readahead(2)` (and also `MADV_WILLNEED`) may block due to 355 /// reading the filesystem metadata. 356 /// 357 /// This returns the number of consecutive pages which are newly mlock(2)ed. Returning `0` means 358 /// that there is no more data to be mlock(2)ed in this file. 359 /// 360 /// The caller must track the number of pages mlock(2)ed not to mlock(2) more pages than 361 /// `RLIMIT_MEMLOCK` if it does not have `CAP_IPC_LOCK`. 362 /// 363 /// # Arguments 364 /// 365 /// * `max_pages` - The maximum number of pages to be mlock(2)ed at once. lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize>366 pub fn lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize> { 367 if let Some((idx_file_range, _)) = self.file_states.find_present_pages_range( 368 self.cursor_mlock, 369 &self.page_states, 370 max_pages, 371 false, 372 ) { 373 let pages = idx_file_range.end - idx_file_range.start; 374 let mem_offset = pages_to_bytes(idx_file_range.start); 375 let size_in_bytes = pages_to_bytes(pages); 376 self.file_mmap 377 .lock_on_fault(mem_offset, size_in_bytes) 378 .map_err(|e| Error::Mmap("mlock", e))?; 379 self.file_mmap 380 .async_prefetch(mem_offset, size_in_bytes) 381 .map_err(|e| Error::Mmap("madvise willneed", e))?; 382 self.cursor_mlock = idx_file_range.end; 383 Ok(pages) 384 } else { 385 self.cursor_mlock = self.file_states.len(); 386 Ok(0) 387 } 388 } 389 390 /// Mark the pages in the file corresponding to the index as cleared. 391 /// 392 /// The contents on the swap file are preserved and will be reused by 393 /// `SwapFile::mark_as_present()` and reduce disk I/O. 394 /// 395 /// If the pages are mlock(2)ed, unlock them before MADV_DONTNEED. This returns the number of 396 /// pages munlock(2)ed. 397 /// 398 /// # Arguments 399 /// 400 /// * `idx_page_range` - The indices of consecutive pages to be cleared. All the pages must be 401 /// present and consecutive in the compacted file. clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>402 pub fn clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> { 403 let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range.clone())?; 404 405 for state in &mut self.page_states[idx_page_range] { 406 state.clear(); 407 } 408 409 let offset = pages_to_bytes(idx_file_range.start); 410 let munlocked_size = if idx_file_range.start < self.cursor_mlock { 411 // idx_page_range is validated at clear_range() and self.cursor_mlock is within the 412 // mmap. 413 let pages = idx_file_range.end.min(self.cursor_mlock) - idx_file_range.start; 414 // munlock(2) first because MADV_DONTNEED fails for mlock(2)ed pages. 415 self.file_mmap 416 .unlock(offset, pages_to_bytes(pages)) 417 .map_err(|e| Error::Mmap("munlock", e))?; 418 pages 419 } else { 420 0 421 }; 422 // offset and size are validated at clear_range(). 423 let size = pages_to_bytes(idx_file_range.end - idx_file_range.start); 424 // The page cache is cleared without writing pages back to file even if they are dirty. 425 // The disk contents which may not be the latest are kept for later trim optimization. 426 self.file_mmap 427 .drop_page_cache(offset, size) 428 .map_err(|e| Error::Mmap("madvise dontneed", e))?; 429 Ok(munlocked_size) 430 } 431 432 /// Free the pages corresponding to the given range in the file. 433 /// 434 /// If the pages are mlock(2)ed, unlock them. This returns the number of pages munlock(2)ed. 435 /// 436 /// # Arguments 437 /// 438 /// * `idx_page_range` - The indices of consecutive pages to be freed. This may contains 439 /// non-present pages. free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>440 pub fn free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> { 441 if idx_page_range.end > self.page_states.len() { 442 return Err(Error::OutOfRange); 443 } 444 let mut mlocked_pages = 0; 445 let mut mlock_range: Option<Range<usize>> = None; 446 for state in &mut self.page_states[idx_page_range] { 447 if !state.is_none() { 448 let Some(idx_file) = state.idx_file() else { 449 unreachable!("the page is not none."); 450 }; 451 self.file_states.free(idx_file); 452 453 if idx_file < self.cursor_mlock && state.is_present() { 454 mlocked_pages += 1; 455 if let Some(range) = mlock_range.as_mut() { 456 if idx_file + 1 == range.start { 457 range.start = idx_file; 458 } else if idx_file == range.end { 459 range.end += 1; 460 } else { 461 self.file_mmap 462 .unlock( 463 pages_to_bytes(range.start), 464 pages_to_bytes(range.end - range.start), 465 ) 466 .map_err(|e| Error::Mmap("munlock", e))?; 467 mlock_range = Some(idx_file..idx_file + 1); 468 } 469 } else { 470 mlock_range = Some(idx_file..idx_file + 1); 471 } 472 } 473 } 474 state.free(); 475 } 476 if let Some(mlock_range) = mlock_range { 477 self.file_mmap 478 .unlock( 479 pages_to_bytes(mlock_range.start), 480 pages_to_bytes(mlock_range.end - mlock_range.start), 481 ) 482 .map_err(|e| Error::Mmap("munlock", e))?; 483 } 484 485 Ok(mlocked_pages) 486 } 487 488 /// munlock(2) pages if there are mlock(2)ed pages in the mmap and reset the internal cursor for 489 /// mlock(2) tracking. clear_mlock(&mut self) -> Result<()>490 pub fn clear_mlock(&mut self) -> Result<()> { 491 if self.cursor_mlock > 0 { 492 // cursor_mlock is not `0` only when disabling vmm-swap is aborted by overriding 493 // vmm-swap enable. munlock(2)ing the whole possible pages is not a problem because this 494 // is not a hot path. 495 self.file_mmap 496 .unlock(0, pages_to_bytes(self.cursor_mlock)) 497 .map_err(|e| Error::Mmap("munlock", e))?; 498 } 499 self.cursor_mlock = 0; 500 Ok(()) 501 } 502 503 /// Mark the page as present on the file. 504 /// 505 /// The content on the swap file on previous `SwapFile::write_to_file()` is reused. 506 /// 507 /// # Arguments 508 /// 509 /// * `idx` - the index of the page from the head of the pages. mark_as_present(&mut self, idx_page: usize) -> Result<()>510 pub fn mark_as_present(&mut self, idx_page: usize) -> Result<()> { 511 let state = self 512 .page_states 513 .get_mut(idx_page) 514 .ok_or(Error::OutOfRange)?; 515 if !state.is_none() && !state.is_present() { 516 state.mark_as_present(); 517 let Some(idx_file) = state.idx_file() else { 518 unreachable!("the page is not none."); 519 }; 520 self.min_possible_present_idx_file = 521 std::cmp::min(idx_file, self.min_possible_present_idx_file); 522 Ok(()) 523 } else { 524 Err(Error::InvalidIndex) 525 } 526 } 527 528 /// Writes the contents to the swap file. 529 /// 530 /// # Arguments 531 /// 532 /// * `idx_page` - the index of the head page of the content from the head of the pages. 533 /// * `mem_slice` - the page content(s). this can be more than 1 page. the size must align with 534 /// the pagesize. write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()>535 pub fn write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()> { 536 // validate 537 if !is_page_aligned(mem_slice.len()) { 538 // mem_slice size must align with page size. 539 return Err(Error::InvalidSize); 540 } 541 let num_pages = bytes_to_pages(mem_slice.len()); 542 if idx_page + num_pages > self.page_states.len() { 543 return Err(Error::OutOfRange); 544 } 545 546 // Setting 0 is faster than setting exact index by complex conditions. 547 self.min_possible_present_idx_file = 0; 548 549 for cur in idx_page..idx_page + num_pages { 550 let state = &mut self.page_states[cur]; 551 if state.is_none() { 552 let idx_file = self.file_states.allocate(cur); 553 state.update(idx_file); 554 } else { 555 state.mark_as_present(); 556 } 557 } 558 559 let mut pending_idx_file = None; 560 let mut pending_pages = 0; 561 let mut mem_slice = mem_slice; 562 for state in self.page_states[idx_page..idx_page + num_pages].iter() { 563 let Some(idx_file) = state.idx_file() else { 564 unreachable!("pages must be allocated"); 565 }; 566 if let Some(pending_idx_file) = pending_idx_file { 567 if idx_file == pending_idx_file + pending_pages { 568 pending_pages += 1; 569 continue; 570 } 571 let size = pages_to_bytes(pending_pages); 572 // Write with pwrite(2) syscall instead of copying contents to mmap because write 573 // syscall is more explicit for kernel how many pages are going to be written while 574 // mmap only knows each page to be written on a page fault basis. 575 self.file 576 .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?; 577 mem_slice = &mem_slice[size..]; 578 } 579 pending_idx_file = Some(idx_file); 580 pending_pages = 1; 581 } 582 if let Some(pending_idx_file) = pending_idx_file { 583 let size = pages_to_bytes(pending_pages); 584 self.file 585 .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?; 586 mem_slice = &mem_slice[size..]; 587 } 588 if !mem_slice.is_empty() { 589 unreachable!("mem_slice must be all consumed"); 590 } 591 592 Ok(()) 593 } 594 595 /// Returns the first range of indices of consecutive pages present in the swap file. 596 /// 597 /// # Arguments 598 /// 599 /// * `max_pages` - the max size of the returned chunk even if the chunk of consecutive present 600 /// pages is longer than this. first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>>601 pub fn first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>> { 602 if let Some((idx_file_range, head_idx_page)) = self.file_states.find_present_pages_range( 603 self.min_possible_present_idx_file, 604 &self.page_states, 605 max_pages, 606 true, 607 ) { 608 self.min_possible_present_idx_file = idx_file_range.start; 609 let idx_page_range = 610 head_idx_page..head_idx_page + idx_file_range.end - idx_file_range.start; 611 Some(idx_page_range) 612 } else { 613 self.min_possible_present_idx_file = self.file_states.len(); 614 None 615 } 616 } 617 618 /// Returns the [VolatileSlice] corresponding to the indices regardless of whether the pages are 619 /// present or not. 620 /// 621 /// If the range is out of the region, this returns [Error::OutOfRange]. 622 /// 623 /// # Arguments 624 /// 625 /// * `idx_page_range` - the indices of the pages. All the pages must be present and consecutive 626 /// in the compacted file. get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice>627 pub fn get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice> { 628 let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range)?; 629 match self.file_mmap.get_slice( 630 pages_to_bytes(idx_file_range.start), 631 pages_to_bytes(idx_file_range.end - idx_file_range.start), 632 ) { 633 Ok(slice) => Ok(slice), 634 Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange), 635 Err(e) => Err(e.into()), 636 } 637 } 638 639 /// Returns the count of present pages in the swap file. present_pages(&self) -> usize640 pub fn present_pages(&self) -> usize { 641 self.page_states 642 .iter() 643 .map(|state| state.is_present() as usize) 644 .sum() 645 } 646 647 /// Convert the index range to corresponding index range of compacted file. 648 /// 649 /// This validates that the `idx_page_range` satisfy: 650 /// 651 /// * `idx_page_range` has corresponding page in the file. 652 /// * corresponding index range in the file is consecutive. convert_idx_page_range_to_idx_file( &self, idx_page_range: Range<usize>, ) -> Result<Range<usize>>653 fn convert_idx_page_range_to_idx_file( 654 &self, 655 idx_page_range: Range<usize>, 656 ) -> Result<Range<usize>> { 657 // Validate that the idx_range is for cosecutive present file pages. 658 let state = self 659 .page_states 660 .get(idx_page_range.start) 661 .ok_or(Error::OutOfRange)?; 662 if state.is_none() || !state.is_present() { 663 return Err(Error::InvalidIndex); 664 } 665 let Some(head_idx_file) = state.idx_file() else { 666 unreachable!("the page is not none."); 667 }; 668 let mut idx_file = head_idx_file; 669 for idx in idx_page_range.start + 1..idx_page_range.end { 670 let state = self.page_states.get(idx).ok_or(Error::OutOfRange)?; 671 idx_file += 1; 672 if state.is_none() 673 || !state.is_present() 674 || state 675 .idx_file() 676 .unwrap_or_else(|| unreachable!("the page is not none.")) 677 != idx_file 678 { 679 return Err(Error::InvalidIndex); 680 } 681 } 682 let idx_file_range = 683 head_idx_file..head_idx_file + idx_page_range.end - idx_page_range.start; 684 Ok(idx_file_range) 685 } 686 } 687 688 #[cfg(test)] 689 mod tests { 690 use std::slice; 691 692 use base::pagesize; 693 use base::sys::FileDataIterator; 694 695 use super::*; 696 697 #[test] new_success()698 fn new_success() { 699 let file = tempfile::tempfile().unwrap(); 700 701 assert_eq!(SwapFile::new(&file, 200).is_ok(), true); 702 } 703 704 #[test] len()705 fn len() { 706 let file = tempfile::tempfile().unwrap(); 707 let swap_file = SwapFile::new(&file, 200).unwrap(); 708 709 assert_eq!(swap_file.page_states.len(), 200); 710 } 711 712 #[test] page_content_default_is_none()713 fn page_content_default_is_none() { 714 let file = tempfile::tempfile().unwrap(); 715 let swap_file = SwapFile::new(&file, 200).unwrap(); 716 717 assert_eq!(swap_file.page_content(0, false).unwrap().is_none(), true); 718 } 719 720 #[test] page_content_returns_content()721 fn page_content_returns_content() { 722 let file = tempfile::tempfile().unwrap(); 723 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 724 725 let data = &vec![1; pagesize()]; 726 swap_file.write_to_file(0, data).unwrap(); 727 728 let page = swap_file.page_content(0, false).unwrap().unwrap(); 729 // TODO(b/315998194): Add safety comment 730 #[allow(clippy::undocumented_unsafe_blocks)] 731 let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) }; 732 assert_eq!(result, data); 733 } 734 735 #[test] page_content_out_of_range()736 fn page_content_out_of_range() { 737 let file = tempfile::tempfile().unwrap(); 738 let swap_file = SwapFile::new(&file, 200).unwrap(); 739 740 assert_eq!(swap_file.page_content(199, false).is_ok(), true); 741 match swap_file.page_content(200, false) { 742 Err(Error::OutOfRange) => {} 743 _ => unreachable!("not out of range"), 744 } 745 } 746 assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8])747 fn assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8]) { 748 let page = swap_file.page_content(idx, false).unwrap().unwrap(); 749 // TODO(b/315998194): Add safety comment 750 #[allow(clippy::undocumented_unsafe_blocks)] 751 let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) }; 752 assert_eq!(result, data); 753 } 754 755 #[test] write_to_file_swap_file()756 fn write_to_file_swap_file() { 757 let file = tempfile::tempfile().unwrap(); 758 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 759 760 let buf1 = &vec![1; pagesize()]; 761 let buf2 = &vec![2; 2 * pagesize()]; 762 swap_file.write_to_file(0, buf1).unwrap(); 763 swap_file.write_to_file(2, buf2).unwrap(); 764 765 // page_content() 766 assert_page_content(&swap_file, 0, buf1); 767 assert_page_content(&swap_file, 2, &buf2[0..pagesize()]); 768 assert_page_content(&swap_file, 3, &buf2[pagesize()..2 * pagesize()]); 769 } 770 771 #[test] write_to_file_invalid_size()772 fn write_to_file_invalid_size() { 773 let file = tempfile::tempfile().unwrap(); 774 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 775 776 let buf = &vec![1; pagesize() + 1]; 777 match swap_file.write_to_file(0, buf) { 778 Err(Error::InvalidSize) => {} 779 _ => unreachable!("not invalid size"), 780 }; 781 } 782 783 #[test] write_to_file_out_of_range()784 fn write_to_file_out_of_range() { 785 let file = tempfile::tempfile().unwrap(); 786 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 787 788 let buf1 = &vec![1; pagesize()]; 789 let buf2 = &vec![2; 2 * pagesize()]; 790 match swap_file.write_to_file(200, buf1) { 791 Err(Error::OutOfRange) => {} 792 _ => unreachable!("not out of range"), 793 }; 794 match swap_file.write_to_file(199, buf2) { 795 Err(Error::OutOfRange) => {} 796 _ => unreachable!("not out of range"), 797 }; 798 } 799 800 #[test] write_to_file_overwrite()801 fn write_to_file_overwrite() { 802 let file = tempfile::tempfile().unwrap(); 803 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 804 805 swap_file.write_to_file(0, &vec![1; pagesize()]).unwrap(); 806 swap_file 807 .write_to_file(2, &vec![2; 2 * pagesize()]) 808 .unwrap(); 809 810 let mut buf = vec![0; 3 * pagesize()]; 811 buf[..pagesize()].fill(3); 812 buf[pagesize()..2 * pagesize()].fill(4); 813 buf[2 * pagesize()..3 * pagesize()].fill(5); 814 swap_file.write_to_file(0, &buf).unwrap(); 815 816 assert_page_content(&swap_file, 0, &vec![3; pagesize()]); 817 assert_page_content(&swap_file, 1, &vec![4; pagesize()]); 818 assert_page_content(&swap_file, 2, &vec![5; pagesize()]); 819 assert_page_content(&swap_file, 3, &vec![2; pagesize()]); 820 assert!(swap_file.page_content(4, false).unwrap().is_none()); 821 822 let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len()) 823 .collect::<std::result::Result<Vec<_>, _>>(); 824 assert_eq!(data, Ok(vec![0..4 * pagesize() as u64])); 825 826 buf[..pagesize()].fill(6); 827 buf[pagesize()..2 * pagesize()].fill(7); 828 buf[2 * pagesize()..3 * pagesize()].fill(8); 829 swap_file.write_to_file(2, &buf).unwrap(); 830 assert_page_content(&swap_file, 0, &vec![3; pagesize()]); 831 assert_page_content(&swap_file, 1, &vec![4; pagesize()]); 832 assert_page_content(&swap_file, 2, &vec![6; pagesize()]); 833 assert_page_content(&swap_file, 3, &vec![7; pagesize()]); 834 assert_page_content(&swap_file, 4, &vec![8; pagesize()]); 835 assert!(swap_file.page_content(5, false).unwrap().is_none()); 836 837 let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len()) 838 .collect::<std::result::Result<Vec<_>, _>>(); 839 assert_eq!(data, Ok(vec![0..5 * pagesize() as u64])); 840 } 841 842 #[test] 843 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support lock_and_start_populate()844 fn lock_and_start_populate() { 845 let file = tempfile::tempfile().unwrap(); 846 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 847 848 swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap(); 849 swap_file 850 .write_to_file(3, &vec![1; 5 * pagesize()]) 851 .unwrap(); 852 swap_file.write_to_file(10, &vec![1; pagesize()]).unwrap(); 853 854 let mut locked_pages = 0; 855 loop { 856 let pages = swap_file.lock_and_async_prefetch(2).unwrap(); 857 if pages == 0 { 858 break; 859 } 860 assert!(pages <= 2); 861 locked_pages += pages; 862 } 863 assert_eq!(locked_pages, 7); 864 } 865 866 #[test] clear_range()867 fn clear_range() { 868 let file = tempfile::tempfile().unwrap(); 869 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 870 871 let data = &vec![1; pagesize()]; 872 swap_file.write_to_file(0, data).unwrap(); 873 swap_file.clear_range(0..1).unwrap(); 874 875 assert!(swap_file.page_content(0, false).unwrap().is_none()); 876 } 877 878 #[test] 879 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support clear_range_unlocked_pages()880 fn clear_range_unlocked_pages() { 881 let file = tempfile::tempfile().unwrap(); 882 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 883 884 swap_file 885 .write_to_file(1, &vec![1; 10 * pagesize()]) 886 .unwrap(); 887 // 1..6 is locked, 6..11 is not locked. 888 assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5); 889 890 // locked pages only 891 assert_eq!(swap_file.clear_range(1..4).unwrap(), 3); 892 // locked pages + non-locked pages 893 assert_eq!(swap_file.clear_range(4..7).unwrap(), 2); 894 // non-locked pages 895 assert_eq!(swap_file.clear_range(10..11).unwrap(), 0); 896 } 897 898 #[test] clear_range_keep_on_disk()899 fn clear_range_keep_on_disk() { 900 let file = tempfile::tempfile().unwrap(); 901 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 902 903 let data = &vec![1; pagesize()]; 904 swap_file.write_to_file(0, data).unwrap(); 905 swap_file.clear_range(0..1).unwrap(); 906 907 let slice = swap_file.page_content(0, true).unwrap().unwrap(); 908 // TODO(b/315998194): Add safety comment 909 #[allow(clippy::undocumented_unsafe_blocks)] 910 let slice = unsafe { slice::from_raw_parts(slice.as_ptr(), slice.size()) }; 911 assert_eq!(slice, data); 912 } 913 914 #[test] clear_range_out_of_range()915 fn clear_range_out_of_range() { 916 let file = tempfile::tempfile().unwrap(); 917 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 918 swap_file.write_to_file(199, &vec![0; pagesize()]).unwrap(); 919 920 match swap_file.clear_range(199..201) { 921 Err(Error::OutOfRange) => {} 922 _ => unreachable!("not out of range"), 923 }; 924 assert!(swap_file.clear_range(199..200).is_ok()); 925 match swap_file.clear_range(200..201) { 926 Err(Error::OutOfRange) => {} 927 _ => unreachable!("not out of range"), 928 }; 929 } 930 931 #[test] free_range()932 fn free_range() { 933 let file = tempfile::tempfile().unwrap(); 934 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 935 936 let data = &vec![1; pagesize()]; 937 swap_file.write_to_file(0, data).unwrap(); 938 swap_file.free_range(0..1).unwrap(); 939 940 assert!(swap_file.page_content(0, false).unwrap().is_none()); 941 assert!(swap_file.page_content(0, true).unwrap().is_none()); 942 } 943 944 #[test] 945 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support free_range_unlocked_pages()946 fn free_range_unlocked_pages() { 947 let file = tempfile::tempfile().unwrap(); 948 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 949 950 swap_file 951 .write_to_file(1, &vec![1; 10 * pagesize()]) 952 .unwrap(); 953 // 1..6 is locked, 6..11 is not locked. 954 assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5); 955 956 // empty pages 957 assert_eq!(swap_file.free_range(0..1).unwrap(), 0); 958 // empty pages + locked pages 959 assert_eq!(swap_file.free_range(0..2).unwrap(), 1); 960 // locked pages only 961 assert_eq!(swap_file.free_range(2..4).unwrap(), 2); 962 // empty pages + locked pages + non-locked pages 963 assert_eq!(swap_file.free_range(3..7).unwrap(), 2); 964 // non-locked pages 965 assert_eq!(swap_file.free_range(10..11).unwrap(), 0); 966 } 967 968 #[test] free_range_out_of_range()969 fn free_range_out_of_range() { 970 let file = tempfile::tempfile().unwrap(); 971 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 972 973 assert_eq!(swap_file.free_range(199..200).is_ok(), true); 974 match swap_file.free_range(200..201) { 975 Err(Error::OutOfRange) => {} 976 _ => unreachable!("not out of range"), 977 }; 978 match swap_file.free_range(199..201) { 979 Err(Error::OutOfRange) => {} 980 _ => unreachable!("not out of range"), 981 }; 982 } 983 984 #[test] free_range_and_write()985 fn free_range_and_write() { 986 let file = tempfile::tempfile().unwrap(); 987 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 988 989 let data = &vec![1; 5 * pagesize()]; 990 swap_file.write_to_file(0, data).unwrap(); 991 swap_file.free_range(0..5).unwrap(); 992 993 swap_file 994 .write_to_file(0, &vec![2; 2 * pagesize()]) 995 .unwrap(); 996 swap_file 997 .write_to_file(5, &vec![3; 4 * pagesize()]) 998 .unwrap(); 999 1000 assert_page_content(&swap_file, 0, &vec![2; pagesize()]); 1001 assert_page_content(&swap_file, 1, &vec![2; pagesize()]); 1002 assert!(swap_file.page_content(2, true).unwrap().is_none()); 1003 assert!(swap_file.page_content(3, true).unwrap().is_none()); 1004 assert!(swap_file.page_content(4, true).unwrap().is_none()); 1005 assert_page_content(&swap_file, 5, &vec![3; pagesize()]); 1006 assert_page_content(&swap_file, 6, &vec![3; pagesize()]); 1007 assert_page_content(&swap_file, 7, &vec![3; pagesize()]); 1008 assert_page_content(&swap_file, 8, &vec![3; pagesize()]); 1009 assert!(swap_file.page_content(9, true).unwrap().is_none()); 1010 1011 let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len()) 1012 .collect::<std::result::Result<Vec<_>, _>>(); 1013 assert_eq!(data, Ok(vec![0..6 * pagesize() as u64])); 1014 } 1015 1016 #[test] 1017 #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support clear_mlock()1018 fn clear_mlock() { 1019 let file = tempfile::tempfile().unwrap(); 1020 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1021 1022 swap_file 1023 .write_to_file(1, &vec![1; 10 * pagesize()]) 1024 .unwrap(); 1025 // success if there is no mlock. 1026 assert!(swap_file.clear_mlock().is_ok()); 1027 1028 assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10); 1029 // success if there is mlocked area. 1030 assert!(swap_file.clear_mlock().is_ok()); 1031 1032 // mlock area is cleared. 1033 assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10); 1034 } 1035 1036 #[test] first_data_range()1037 fn first_data_range() { 1038 let file = tempfile::tempfile().unwrap(); 1039 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1040 1041 swap_file 1042 .write_to_file(1, &vec![1; 2 * pagesize()]) 1043 .unwrap(); 1044 swap_file.write_to_file(3, &vec![2; pagesize()]).unwrap(); 1045 1046 assert_eq!(swap_file.first_data_range(200).unwrap(), 1..4); 1047 assert_eq!(swap_file.first_data_range(2).unwrap(), 1..3); 1048 assert_eq!(swap_file.first_data_range(1).unwrap(), 1..2); 1049 swap_file.clear_range(1..3).unwrap(); 1050 assert_eq!(swap_file.first_data_range(2).unwrap(), 3..4); 1051 swap_file.clear_range(3..4).unwrap(); 1052 assert!(swap_file.first_data_range(2).is_none()); 1053 } 1054 1055 #[test] get_slice()1056 fn get_slice() { 1057 let file = tempfile::tempfile().unwrap(); 1058 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1059 1060 swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap(); 1061 swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap(); 1062 1063 let slice = swap_file.get_slice(1..3).unwrap(); 1064 assert_eq!(slice.size(), 2 * pagesize()); 1065 let mut buf = vec![0u8; pagesize()]; 1066 slice.get_slice(0, pagesize()).unwrap().copy_to(&mut buf); 1067 assert_eq!(buf, vec![1; pagesize()]); 1068 1069 let mut buf = vec![0u8; pagesize()]; 1070 slice 1071 .get_slice(pagesize(), pagesize()) 1072 .unwrap() 1073 .copy_to(&mut buf); 1074 assert_eq!(buf, vec![2; pagesize()]); 1075 } 1076 1077 #[test] get_slice_out_of_range()1078 fn get_slice_out_of_range() { 1079 let file = tempfile::tempfile().unwrap(); 1080 let swap_file = SwapFile::new(&file, 200).unwrap(); 1081 1082 match swap_file.get_slice(200..201) { 1083 Err(Error::OutOfRange) => {} 1084 other => { 1085 unreachable!("unexpected result {:?}", other); 1086 } 1087 } 1088 } 1089 1090 #[test] present_pages()1091 fn present_pages() { 1092 let file = tempfile::tempfile().unwrap(); 1093 let mut swap_file = SwapFile::new(&file, 200).unwrap(); 1094 1095 swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap(); 1096 swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap(); 1097 1098 assert_eq!(swap_file.present_pages(), 2); 1099 } 1100 } 1101