xref: /aosp_15_r20/external/crosvm/swap/src/file.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #![deny(missing_docs)]
6 
7 use std::fs::File;
8 use std::ops::Range;
9 use std::os::unix::fs::FileExt;
10 
11 use base::error;
12 use base::linux::MemoryMappingUnix;
13 use base::MemoryMapping;
14 use base::MemoryMappingBuilder;
15 use base::MmapError;
16 use base::Protection;
17 use base::VolatileMemory;
18 use base::VolatileMemoryError;
19 use base::VolatileSlice;
20 use thiserror::Error as ThisError;
21 
22 use crate::pagesize::bytes_to_pages;
23 use crate::pagesize::is_page_aligned;
24 use crate::pagesize::pages_to_bytes;
25 
26 pub type Result<T> = std::result::Result<T, Error>;
27 
28 // On 4KB page size system, guest memory must be less than 8 TiB which is reasonable assumption.
29 const MAX_PAGE_IDX: usize = (1 << 31) - 2;
30 
31 #[derive(ThisError, Debug)]
32 pub enum Error {
33     #[error("failed to io: {0}")]
34     Io(#[from] std::io::Error),
35     #[error("failed to mmap operation ({0}): {1}")]
36     Mmap(&'static str, MmapError),
37     #[error("failed to volatile memory operation: {0}")]
38     VolatileMemory(#[from] VolatileMemoryError),
39     #[error("index is out of range")]
40     OutOfRange,
41     #[error("data size is invalid")]
42     InvalidSize,
43     #[error("index is invalid")]
44     InvalidIndex,
45 }
46 
47 /// u32 to pack the state of a page on the file.
48 ///
49 /// * MSB: Whether the page on file is freed. (1: freed, 2: allocated)
50 /// * lower 31 bits:
51 ///   * The corresponding page index if the file page is allocated.
52 ///   * The file page index + 1 of next freed file page if the file page is freed. Zero means it is
53 ///     the last page in the free list.
54 #[derive(Debug)]
55 struct FilePageState(u32);
56 
57 impl FilePageState {
58     const FREED_BIT_MASK: u32 = 1 << 31;
59 
freed_state(first_freed_page: Option<usize>) -> Self60     fn freed_state(first_freed_page: Option<usize>) -> Self {
61         Self(
62             Self::FREED_BIT_MASK
63                 | first_freed_page
64                     .map(|idx_file| idx_file as u32 + 1)
65                     .unwrap_or(0),
66         )
67     }
68 
allocated_state(idx_page: usize) -> Option<Self>69     fn allocated_state(idx_page: usize) -> Option<Self> {
70         if idx_page <= MAX_PAGE_IDX {
71             Some(Self(idx_page as u32))
72         } else {
73             // idx_page is invalid.
74             None
75         }
76     }
77 
is_freed(&self) -> bool78     fn is_freed(&self) -> bool {
79         self.0 & Self::FREED_BIT_MASK != 0
80     }
81 
82     /// This is valid only if the page is freed.
next_file_freed_idx(&self) -> Option<Option<usize>>83     fn next_file_freed_idx(&self) -> Option<Option<usize>> {
84         if self.is_freed() {
85             let next_idx_file = !Self::FREED_BIT_MASK & self.0;
86             if next_idx_file == 0 {
87                 Some(None)
88             } else {
89                 Some(Some(next_idx_file as usize - 1))
90             }
91         } else {
92             None
93         }
94     }
95 
96     /// This is valid only if the page is allocated.
idx_page(&self) -> Option<usize>97     fn idx_page(&self) -> Option<usize> {
98         if self.is_freed() {
99             // The file page is freed.
100             None
101         } else {
102             Some(self.0 as usize)
103         }
104     }
105 }
106 
107 #[derive(Debug)]
108 struct FilePageStates {
109     /// Freed pages on the swap file are managed in a free list. `first_freed_idx_file` points to
110     /// the first page index in the list.
111     first_idx_file_freed: Option<usize>,
112     states: Vec<FilePageState>,
113 }
114 
115 impl FilePageStates {
new(capacity: usize) -> Self116     fn new(capacity: usize) -> Self {
117         FilePageStates {
118             first_idx_file_freed: None,
119             states: Vec::with_capacity(capacity),
120         }
121     }
122 
len(&self) -> usize123     fn len(&self) -> usize {
124         self.states.len()
125     }
126 
127     /// Free a page on swap file.
free(&mut self, idx_file: usize)128     fn free(&mut self, idx_file: usize) {
129         self.states[idx_file] = FilePageState::freed_state(self.first_idx_file_freed);
130         self.first_idx_file_freed = Some(idx_file);
131     }
132 
133     /// Allocates a file page on the swap file.
134     ///
135     /// This returns the index of the allocated file page.
136     ///
137     /// This reuses freed file pages first. If the free list is empty, this allocates new pages in
138     /// the file.
allocate(&mut self, idx_page: usize) -> usize139     fn allocate(&mut self, idx_page: usize) -> usize {
140         if let Some(idx_file_freed) = self.first_idx_file_freed {
141             // TODO(kawasin): Collect consecutive freed pages in the free list to reduce number of
142             // writes.
143             let Some(next_idx_file_freed) = self.states[idx_file_freed].next_file_freed_idx()
144             else {
145                 unreachable!("pages in free list must be freed pages")
146             };
147             let Some(state) = FilePageState::allocated_state(idx_page) else {
148                 unreachable!("idx_page must be less than MAX_PAGE_IDX");
149             };
150             self.states[idx_file_freed] = state;
151             self.first_idx_file_freed = next_idx_file_freed;
152 
153             idx_file_freed
154         } else {
155             // The free list is empty. Allocate new pages.
156             let head_idx_file = self.states.len();
157             let Some(state) = FilePageState::allocated_state(idx_page) else {
158                 unreachable!("idx must be less than MAX_PAGE_IDX");
159             };
160             self.states.push(state);
161             head_idx_file
162         }
163     }
164 
165     /// Find the index range of file pages that are all present.
166     ///
167     /// This returns the pair of range of file page indexes and the index of the corresponding first
168     /// page.
169     ///
170     /// Returns `None` if no pages after `idx_file` are present.
171     ///
172     /// # Arguments
173     ///
174     /// * `idx_file` - The first index to start searching from.
175     /// * `page_states` - The page states
176     /// * `max_pages` - The maximum number of pages to search.
177     /// * `consecutive` - If true, the pages must have consecutive idx_page values.
find_present_pages_range( &self, idx_file: usize, page_states: &[PageState], max_pages: usize, consecutive: bool, ) -> Option<(Range<usize>, usize)>178     fn find_present_pages_range(
179         &self,
180         idx_file: usize,
181         page_states: &[PageState],
182         max_pages: usize,
183         consecutive: bool,
184     ) -> Option<(Range<usize>, usize)> {
185         let next_head_idx_offset = self.states[idx_file..].iter().position(|state| {
186             !state.is_freed()
187                 && page_states[state
188                     .idx_page()
189                     .unwrap_or_else(|| unreachable!("the page is not freed"))]
190                 .is_present()
191         })?;
192         let idx_file = idx_file + next_head_idx_offset;
193 
194         let Some(head_idx_page) = self.states[idx_file].idx_page() else {
195             unreachable!("the file page must not be freed");
196         };
197 
198         let mut pages = 1;
199 
200         if max_pages > 1 {
201             for state in self.states[idx_file + 1..].iter() {
202                 if state.is_freed() {
203                     break;
204                 } else {
205                     let Some(idx_page) = state.idx_page() else {
206                         unreachable!("allocated page must have idx_page");
207                     };
208                     if !page_states[idx_page].is_present()
209                         || (consecutive && idx_page != head_idx_page + pages)
210                     {
211                         break;
212                     }
213                 }
214 
215                 pages += 1;
216                 if pages >= max_pages {
217                     break;
218                 }
219             }
220         }
221 
222         Some((idx_file..idx_file + pages, head_idx_page))
223     }
224 }
225 
226 /// u32 to pack the state of a guest memory page.
227 ///
228 /// * If the page is not on the swap file, the value is zero.
229 /// * MSB: Whether the page is stale or not. (0: stale, 1: present).
230 /// * lower 31 bits: The corresponding file page index + 1. Never be zero.
231 #[derive(Clone, Debug)]
232 struct PageState(u32);
233 
234 impl PageState {
235     const IDX_FILE_MASK: u32 = (1 << 31) - 1;
236     const PRESENT_BIT_MASK: u32 = 1 << 31;
237 
is_none(&self) -> bool238     fn is_none(&self) -> bool {
239         self.0 == 0
240     }
241 
idx_file(&self) -> Option<usize>242     fn idx_file(&self) -> Option<usize> {
243         if self.0 != 0 {
244             Some((self.0 & Self::IDX_FILE_MASK) as usize - 1)
245         } else {
246             None
247         }
248     }
249 
is_present(&self) -> bool250     fn is_present(&self) -> bool {
251         self.0 & Self::PRESENT_BIT_MASK != 0
252     }
253 
update(&mut self, idx_file: usize)254     fn update(&mut self, idx_file: usize) {
255         self.0 = (idx_file as u32 + 1) | Self::PRESENT_BIT_MASK;
256     }
257 
mark_as_present(&mut self)258     fn mark_as_present(&mut self) {
259         self.0 |= Self::PRESENT_BIT_MASK;
260     }
261 
clear(&mut self)262     fn clear(&mut self) {
263         self.0 &= !Self::PRESENT_BIT_MASK;
264     }
265 
free(&mut self)266     fn free(&mut self) {
267         self.0 = 0;
268     }
269 }
270 
271 /// [SwapFile] stores active pages in a memory region.
272 ///
273 /// This shares the swap file with other regions and creates mmap corresponding range in the file.
274 ///
275 /// TODO(kawasin): The file structure is straightforward and is not optimized yet.
276 /// Each page in the file corresponds to the page in the memory region.
277 #[derive(Debug)]
278 pub struct SwapFile<'a> {
279     file: &'a File,
280     file_mmap: MemoryMapping,
281     page_states: Vec<PageState>,
282     file_states: FilePageStates,
283     // All the data pages before this index are mlock(2)ed.
284     cursor_mlock: usize,
285     min_possible_present_idx_file: usize,
286 }
287 
288 impl<'a> SwapFile<'a> {
289     /// Creates an initialized [SwapFile] for a memory region.
290     ///
291     /// The all pages are marked as empty at first time.
292     ///
293     /// # Arguments
294     ///
295     /// * `file` - The swap file.
296     /// * `num_of_pages` - The number of pages in the region.
new(file: &'a File, num_of_pages: usize) -> Result<Self>297     pub fn new(file: &'a File, num_of_pages: usize) -> Result<Self> {
298         if num_of_pages > MAX_PAGE_IDX {
299             return Err(Error::InvalidSize);
300         }
301         let file_mmap = MemoryMappingBuilder::new(pages_to_bytes(num_of_pages))
302             .from_file(file)
303             .protection(Protection::read())
304             .build()
305             .map_err(|e| Error::Mmap("create", e))?;
306         Ok(Self {
307             file,
308             file_mmap,
309             page_states: vec![PageState(0); num_of_pages],
310             file_states: FilePageStates::new(num_of_pages),
311             cursor_mlock: 0,
312             min_possible_present_idx_file: 0,
313         })
314     }
315 
316     /// Returns a content of the page corresponding to the index if it is present.
317     ///
318     /// Returns [Option::None] if no content in the file.
319     ///
320     /// Returns [Error::OutOfRange] if the `idx` is out of range.
321     ///
322     /// # Arguments
323     ///
324     /// * `idx_page` - the index of the page from the head of the pages.
page_content( &self, idx_page: usize, allow_cleared: bool, ) -> Result<Option<VolatileSlice>>325     pub fn page_content(
326         &self,
327         idx_page: usize,
328         allow_cleared: bool,
329     ) -> Result<Option<VolatileSlice>> {
330         let state = self.page_states.get(idx_page).ok_or(Error::OutOfRange)?;
331         if !state.is_none() && (allow_cleared || state.is_present()) {
332             let Some(idx_file) = state.idx_file() else {
333                 unreachable!("the page is not none");
334             };
335             return match self
336                 .file_mmap
337                 .get_slice(pages_to_bytes(idx_file), pages_to_bytes(1))
338             {
339                 Ok(slice) => Ok(Some(slice)),
340                 Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange),
341                 Err(e) => Err(e.into()),
342             };
343         }
344         Ok(None)
345     }
346 
347     /// Start readahead the swap file into the page cache from the head.
348     ///
349     /// This also `mlock2(2)` the pages not to be dropped again after populated. This does not block
350     /// the caller thread by I/O wait because:
351     ///
352     /// * `mlock2(2)` is executed with `MLOCK_ONFAULT`.
353     /// * `MADV_WILLNEED` is the same as `readahead(2)` which triggers the readahead background.
354     ///   * However Linux has a bug that `readahead(2)` (and also `MADV_WILLNEED`) may block due to
355     ///     reading the filesystem metadata.
356     ///
357     /// This returns the number of consecutive pages which are newly mlock(2)ed. Returning `0` means
358     /// that there is no more data to be mlock(2)ed in this file.
359     ///
360     /// The caller must track the number of pages mlock(2)ed not to mlock(2) more pages than
361     /// `RLIMIT_MEMLOCK` if it does not have `CAP_IPC_LOCK`.
362     ///
363     /// # Arguments
364     ///
365     /// * `max_pages` - The maximum number of pages to be mlock(2)ed at once.
lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize>366     pub fn lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize> {
367         if let Some((idx_file_range, _)) = self.file_states.find_present_pages_range(
368             self.cursor_mlock,
369             &self.page_states,
370             max_pages,
371             false,
372         ) {
373             let pages = idx_file_range.end - idx_file_range.start;
374             let mem_offset = pages_to_bytes(idx_file_range.start);
375             let size_in_bytes = pages_to_bytes(pages);
376             self.file_mmap
377                 .lock_on_fault(mem_offset, size_in_bytes)
378                 .map_err(|e| Error::Mmap("mlock", e))?;
379             self.file_mmap
380                 .async_prefetch(mem_offset, size_in_bytes)
381                 .map_err(|e| Error::Mmap("madvise willneed", e))?;
382             self.cursor_mlock = idx_file_range.end;
383             Ok(pages)
384         } else {
385             self.cursor_mlock = self.file_states.len();
386             Ok(0)
387         }
388     }
389 
390     /// Mark the pages in the file corresponding to the index as cleared.
391     ///
392     /// The contents on the swap file are preserved and will be reused by
393     /// `SwapFile::mark_as_present()` and reduce disk I/O.
394     ///
395     /// If the pages are mlock(2)ed, unlock them before MADV_DONTNEED. This returns the number of
396     /// pages munlock(2)ed.
397     ///
398     /// # Arguments
399     ///
400     /// * `idx_page_range` - The indices of consecutive pages to be cleared. All the pages must be
401     ///   present and consecutive in the compacted file.
clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>402     pub fn clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> {
403         let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range.clone())?;
404 
405         for state in &mut self.page_states[idx_page_range] {
406             state.clear();
407         }
408 
409         let offset = pages_to_bytes(idx_file_range.start);
410         let munlocked_size = if idx_file_range.start < self.cursor_mlock {
411             // idx_page_range is validated at clear_range() and self.cursor_mlock is within the
412             // mmap.
413             let pages = idx_file_range.end.min(self.cursor_mlock) - idx_file_range.start;
414             // munlock(2) first because MADV_DONTNEED fails for mlock(2)ed pages.
415             self.file_mmap
416                 .unlock(offset, pages_to_bytes(pages))
417                 .map_err(|e| Error::Mmap("munlock", e))?;
418             pages
419         } else {
420             0
421         };
422         // offset and size are validated at clear_range().
423         let size = pages_to_bytes(idx_file_range.end - idx_file_range.start);
424         // The page cache is cleared without writing pages back to file even if they are dirty.
425         // The disk contents which may not be the latest are kept for later trim optimization.
426         self.file_mmap
427             .drop_page_cache(offset, size)
428             .map_err(|e| Error::Mmap("madvise dontneed", e))?;
429         Ok(munlocked_size)
430     }
431 
432     /// Free the pages corresponding to the given range in the file.
433     ///
434     /// If the pages are mlock(2)ed, unlock them. This returns the number of pages munlock(2)ed.
435     ///
436     /// # Arguments
437     ///
438     /// * `idx_page_range` - The indices of consecutive pages to be freed. This may contains
439     ///   non-present pages.
free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize>440     pub fn free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> {
441         if idx_page_range.end > self.page_states.len() {
442             return Err(Error::OutOfRange);
443         }
444         let mut mlocked_pages = 0;
445         let mut mlock_range: Option<Range<usize>> = None;
446         for state in &mut self.page_states[idx_page_range] {
447             if !state.is_none() {
448                 let Some(idx_file) = state.idx_file() else {
449                     unreachable!("the page is not none.");
450                 };
451                 self.file_states.free(idx_file);
452 
453                 if idx_file < self.cursor_mlock && state.is_present() {
454                     mlocked_pages += 1;
455                     if let Some(range) = mlock_range.as_mut() {
456                         if idx_file + 1 == range.start {
457                             range.start = idx_file;
458                         } else if idx_file == range.end {
459                             range.end += 1;
460                         } else {
461                             self.file_mmap
462                                 .unlock(
463                                     pages_to_bytes(range.start),
464                                     pages_to_bytes(range.end - range.start),
465                                 )
466                                 .map_err(|e| Error::Mmap("munlock", e))?;
467                             mlock_range = Some(idx_file..idx_file + 1);
468                         }
469                     } else {
470                         mlock_range = Some(idx_file..idx_file + 1);
471                     }
472                 }
473             }
474             state.free();
475         }
476         if let Some(mlock_range) = mlock_range {
477             self.file_mmap
478                 .unlock(
479                     pages_to_bytes(mlock_range.start),
480                     pages_to_bytes(mlock_range.end - mlock_range.start),
481                 )
482                 .map_err(|e| Error::Mmap("munlock", e))?;
483         }
484 
485         Ok(mlocked_pages)
486     }
487 
488     /// munlock(2) pages if there are mlock(2)ed pages in the mmap and reset the internal cursor for
489     /// mlock(2) tracking.
clear_mlock(&mut self) -> Result<()>490     pub fn clear_mlock(&mut self) -> Result<()> {
491         if self.cursor_mlock > 0 {
492             // cursor_mlock is not `0` only when disabling vmm-swap is aborted by overriding
493             // vmm-swap enable. munlock(2)ing the whole possible pages is not a problem because this
494             // is not a hot path.
495             self.file_mmap
496                 .unlock(0, pages_to_bytes(self.cursor_mlock))
497                 .map_err(|e| Error::Mmap("munlock", e))?;
498         }
499         self.cursor_mlock = 0;
500         Ok(())
501     }
502 
503     /// Mark the page as present on the file.
504     ///
505     /// The content on the swap file on previous `SwapFile::write_to_file()` is reused.
506     ///
507     /// # Arguments
508     ///
509     /// * `idx` - the index of the page from the head of the pages.
mark_as_present(&mut self, idx_page: usize) -> Result<()>510     pub fn mark_as_present(&mut self, idx_page: usize) -> Result<()> {
511         let state = self
512             .page_states
513             .get_mut(idx_page)
514             .ok_or(Error::OutOfRange)?;
515         if !state.is_none() && !state.is_present() {
516             state.mark_as_present();
517             let Some(idx_file) = state.idx_file() else {
518                 unreachable!("the page is not none.");
519             };
520             self.min_possible_present_idx_file =
521                 std::cmp::min(idx_file, self.min_possible_present_idx_file);
522             Ok(())
523         } else {
524             Err(Error::InvalidIndex)
525         }
526     }
527 
528     /// Writes the contents to the swap file.
529     ///
530     /// # Arguments
531     ///
532     /// * `idx_page` - the index of the head page of the content from the head of the pages.
533     /// * `mem_slice` - the page content(s). this can be more than 1 page. the size must align with
534     ///   the pagesize.
write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()>535     pub fn write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()> {
536         // validate
537         if !is_page_aligned(mem_slice.len()) {
538             // mem_slice size must align with page size.
539             return Err(Error::InvalidSize);
540         }
541         let num_pages = bytes_to_pages(mem_slice.len());
542         if idx_page + num_pages > self.page_states.len() {
543             return Err(Error::OutOfRange);
544         }
545 
546         // Setting 0 is faster than setting exact index by complex conditions.
547         self.min_possible_present_idx_file = 0;
548 
549         for cur in idx_page..idx_page + num_pages {
550             let state = &mut self.page_states[cur];
551             if state.is_none() {
552                 let idx_file = self.file_states.allocate(cur);
553                 state.update(idx_file);
554             } else {
555                 state.mark_as_present();
556             }
557         }
558 
559         let mut pending_idx_file = None;
560         let mut pending_pages = 0;
561         let mut mem_slice = mem_slice;
562         for state in self.page_states[idx_page..idx_page + num_pages].iter() {
563             let Some(idx_file) = state.idx_file() else {
564                 unreachable!("pages must be allocated");
565             };
566             if let Some(pending_idx_file) = pending_idx_file {
567                 if idx_file == pending_idx_file + pending_pages {
568                     pending_pages += 1;
569                     continue;
570                 }
571                 let size = pages_to_bytes(pending_pages);
572                 // Write with pwrite(2) syscall instead of copying contents to mmap because write
573                 // syscall is more explicit for kernel how many pages are going to be written while
574                 // mmap only knows each page to be written on a page fault basis.
575                 self.file
576                     .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?;
577                 mem_slice = &mem_slice[size..];
578             }
579             pending_idx_file = Some(idx_file);
580             pending_pages = 1;
581         }
582         if let Some(pending_idx_file) = pending_idx_file {
583             let size = pages_to_bytes(pending_pages);
584             self.file
585                 .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?;
586             mem_slice = &mem_slice[size..];
587         }
588         if !mem_slice.is_empty() {
589             unreachable!("mem_slice must be all consumed");
590         }
591 
592         Ok(())
593     }
594 
595     /// Returns the first range of indices of consecutive pages present in the swap file.
596     ///
597     /// # Arguments
598     ///
599     /// * `max_pages` - the max size of the returned chunk even if the chunk of consecutive present
600     ///   pages is longer than this.
first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>>601     pub fn first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>> {
602         if let Some((idx_file_range, head_idx_page)) = self.file_states.find_present_pages_range(
603             self.min_possible_present_idx_file,
604             &self.page_states,
605             max_pages,
606             true,
607         ) {
608             self.min_possible_present_idx_file = idx_file_range.start;
609             let idx_page_range =
610                 head_idx_page..head_idx_page + idx_file_range.end - idx_file_range.start;
611             Some(idx_page_range)
612         } else {
613             self.min_possible_present_idx_file = self.file_states.len();
614             None
615         }
616     }
617 
618     /// Returns the [VolatileSlice] corresponding to the indices regardless of whether the pages are
619     /// present or not.
620     ///
621     /// If the range is out of the region, this returns [Error::OutOfRange].
622     ///
623     /// # Arguments
624     ///
625     /// * `idx_page_range` - the indices of the pages. All the pages must be present and consecutive
626     ///   in the compacted file.
get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice>627     pub fn get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice> {
628         let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range)?;
629         match self.file_mmap.get_slice(
630             pages_to_bytes(idx_file_range.start),
631             pages_to_bytes(idx_file_range.end - idx_file_range.start),
632         ) {
633             Ok(slice) => Ok(slice),
634             Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange),
635             Err(e) => Err(e.into()),
636         }
637     }
638 
639     /// Returns the count of present pages in the swap file.
present_pages(&self) -> usize640     pub fn present_pages(&self) -> usize {
641         self.page_states
642             .iter()
643             .map(|state| state.is_present() as usize)
644             .sum()
645     }
646 
647     /// Convert the index range to corresponding index range of compacted file.
648     ///
649     /// This validates that the `idx_page_range` satisfy:
650     ///
651     /// * `idx_page_range` has corresponding page in the file.
652     /// * corresponding index range in the file is consecutive.
convert_idx_page_range_to_idx_file( &self, idx_page_range: Range<usize>, ) -> Result<Range<usize>>653     fn convert_idx_page_range_to_idx_file(
654         &self,
655         idx_page_range: Range<usize>,
656     ) -> Result<Range<usize>> {
657         // Validate that the idx_range is for cosecutive present file pages.
658         let state = self
659             .page_states
660             .get(idx_page_range.start)
661             .ok_or(Error::OutOfRange)?;
662         if state.is_none() || !state.is_present() {
663             return Err(Error::InvalidIndex);
664         }
665         let Some(head_idx_file) = state.idx_file() else {
666             unreachable!("the page is not none.");
667         };
668         let mut idx_file = head_idx_file;
669         for idx in idx_page_range.start + 1..idx_page_range.end {
670             let state = self.page_states.get(idx).ok_or(Error::OutOfRange)?;
671             idx_file += 1;
672             if state.is_none()
673                 || !state.is_present()
674                 || state
675                     .idx_file()
676                     .unwrap_or_else(|| unreachable!("the page is not none."))
677                     != idx_file
678             {
679                 return Err(Error::InvalidIndex);
680             }
681         }
682         let idx_file_range =
683             head_idx_file..head_idx_file + idx_page_range.end - idx_page_range.start;
684         Ok(idx_file_range)
685     }
686 }
687 
688 #[cfg(test)]
689 mod tests {
690     use std::slice;
691 
692     use base::pagesize;
693     use base::sys::FileDataIterator;
694 
695     use super::*;
696 
697     #[test]
new_success()698     fn new_success() {
699         let file = tempfile::tempfile().unwrap();
700 
701         assert_eq!(SwapFile::new(&file, 200).is_ok(), true);
702     }
703 
704     #[test]
len()705     fn len() {
706         let file = tempfile::tempfile().unwrap();
707         let swap_file = SwapFile::new(&file, 200).unwrap();
708 
709         assert_eq!(swap_file.page_states.len(), 200);
710     }
711 
712     #[test]
page_content_default_is_none()713     fn page_content_default_is_none() {
714         let file = tempfile::tempfile().unwrap();
715         let swap_file = SwapFile::new(&file, 200).unwrap();
716 
717         assert_eq!(swap_file.page_content(0, false).unwrap().is_none(), true);
718     }
719 
720     #[test]
page_content_returns_content()721     fn page_content_returns_content() {
722         let file = tempfile::tempfile().unwrap();
723         let mut swap_file = SwapFile::new(&file, 200).unwrap();
724 
725         let data = &vec![1; pagesize()];
726         swap_file.write_to_file(0, data).unwrap();
727 
728         let page = swap_file.page_content(0, false).unwrap().unwrap();
729         // TODO(b/315998194): Add safety comment
730         #[allow(clippy::undocumented_unsafe_blocks)]
731         let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) };
732         assert_eq!(result, data);
733     }
734 
735     #[test]
page_content_out_of_range()736     fn page_content_out_of_range() {
737         let file = tempfile::tempfile().unwrap();
738         let swap_file = SwapFile::new(&file, 200).unwrap();
739 
740         assert_eq!(swap_file.page_content(199, false).is_ok(), true);
741         match swap_file.page_content(200, false) {
742             Err(Error::OutOfRange) => {}
743             _ => unreachable!("not out of range"),
744         }
745     }
746 
assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8])747     fn assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8]) {
748         let page = swap_file.page_content(idx, false).unwrap().unwrap();
749         // TODO(b/315998194): Add safety comment
750         #[allow(clippy::undocumented_unsafe_blocks)]
751         let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) };
752         assert_eq!(result, data);
753     }
754 
755     #[test]
write_to_file_swap_file()756     fn write_to_file_swap_file() {
757         let file = tempfile::tempfile().unwrap();
758         let mut swap_file = SwapFile::new(&file, 200).unwrap();
759 
760         let buf1 = &vec![1; pagesize()];
761         let buf2 = &vec![2; 2 * pagesize()];
762         swap_file.write_to_file(0, buf1).unwrap();
763         swap_file.write_to_file(2, buf2).unwrap();
764 
765         // page_content()
766         assert_page_content(&swap_file, 0, buf1);
767         assert_page_content(&swap_file, 2, &buf2[0..pagesize()]);
768         assert_page_content(&swap_file, 3, &buf2[pagesize()..2 * pagesize()]);
769     }
770 
771     #[test]
write_to_file_invalid_size()772     fn write_to_file_invalid_size() {
773         let file = tempfile::tempfile().unwrap();
774         let mut swap_file = SwapFile::new(&file, 200).unwrap();
775 
776         let buf = &vec![1; pagesize() + 1];
777         match swap_file.write_to_file(0, buf) {
778             Err(Error::InvalidSize) => {}
779             _ => unreachable!("not invalid size"),
780         };
781     }
782 
783     #[test]
write_to_file_out_of_range()784     fn write_to_file_out_of_range() {
785         let file = tempfile::tempfile().unwrap();
786         let mut swap_file = SwapFile::new(&file, 200).unwrap();
787 
788         let buf1 = &vec![1; pagesize()];
789         let buf2 = &vec![2; 2 * pagesize()];
790         match swap_file.write_to_file(200, buf1) {
791             Err(Error::OutOfRange) => {}
792             _ => unreachable!("not out of range"),
793         };
794         match swap_file.write_to_file(199, buf2) {
795             Err(Error::OutOfRange) => {}
796             _ => unreachable!("not out of range"),
797         };
798     }
799 
800     #[test]
write_to_file_overwrite()801     fn write_to_file_overwrite() {
802         let file = tempfile::tempfile().unwrap();
803         let mut swap_file = SwapFile::new(&file, 200).unwrap();
804 
805         swap_file.write_to_file(0, &vec![1; pagesize()]).unwrap();
806         swap_file
807             .write_to_file(2, &vec![2; 2 * pagesize()])
808             .unwrap();
809 
810         let mut buf = vec![0; 3 * pagesize()];
811         buf[..pagesize()].fill(3);
812         buf[pagesize()..2 * pagesize()].fill(4);
813         buf[2 * pagesize()..3 * pagesize()].fill(5);
814         swap_file.write_to_file(0, &buf).unwrap();
815 
816         assert_page_content(&swap_file, 0, &vec![3; pagesize()]);
817         assert_page_content(&swap_file, 1, &vec![4; pagesize()]);
818         assert_page_content(&swap_file, 2, &vec![5; pagesize()]);
819         assert_page_content(&swap_file, 3, &vec![2; pagesize()]);
820         assert!(swap_file.page_content(4, false).unwrap().is_none());
821 
822         let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
823             .collect::<std::result::Result<Vec<_>, _>>();
824         assert_eq!(data, Ok(vec![0..4 * pagesize() as u64]));
825 
826         buf[..pagesize()].fill(6);
827         buf[pagesize()..2 * pagesize()].fill(7);
828         buf[2 * pagesize()..3 * pagesize()].fill(8);
829         swap_file.write_to_file(2, &buf).unwrap();
830         assert_page_content(&swap_file, 0, &vec![3; pagesize()]);
831         assert_page_content(&swap_file, 1, &vec![4; pagesize()]);
832         assert_page_content(&swap_file, 2, &vec![6; pagesize()]);
833         assert_page_content(&swap_file, 3, &vec![7; pagesize()]);
834         assert_page_content(&swap_file, 4, &vec![8; pagesize()]);
835         assert!(swap_file.page_content(5, false).unwrap().is_none());
836 
837         let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
838             .collect::<std::result::Result<Vec<_>, _>>();
839         assert_eq!(data, Ok(vec![0..5 * pagesize() as u64]));
840     }
841 
842     #[test]
843     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
lock_and_start_populate()844     fn lock_and_start_populate() {
845         let file = tempfile::tempfile().unwrap();
846         let mut swap_file = SwapFile::new(&file, 200).unwrap();
847 
848         swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
849         swap_file
850             .write_to_file(3, &vec![1; 5 * pagesize()])
851             .unwrap();
852         swap_file.write_to_file(10, &vec![1; pagesize()]).unwrap();
853 
854         let mut locked_pages = 0;
855         loop {
856             let pages = swap_file.lock_and_async_prefetch(2).unwrap();
857             if pages == 0 {
858                 break;
859             }
860             assert!(pages <= 2);
861             locked_pages += pages;
862         }
863         assert_eq!(locked_pages, 7);
864     }
865 
866     #[test]
clear_range()867     fn clear_range() {
868         let file = tempfile::tempfile().unwrap();
869         let mut swap_file = SwapFile::new(&file, 200).unwrap();
870 
871         let data = &vec![1; pagesize()];
872         swap_file.write_to_file(0, data).unwrap();
873         swap_file.clear_range(0..1).unwrap();
874 
875         assert!(swap_file.page_content(0, false).unwrap().is_none());
876     }
877 
878     #[test]
879     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
clear_range_unlocked_pages()880     fn clear_range_unlocked_pages() {
881         let file = tempfile::tempfile().unwrap();
882         let mut swap_file = SwapFile::new(&file, 200).unwrap();
883 
884         swap_file
885             .write_to_file(1, &vec![1; 10 * pagesize()])
886             .unwrap();
887         // 1..6 is locked, 6..11 is not locked.
888         assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5);
889 
890         // locked pages only
891         assert_eq!(swap_file.clear_range(1..4).unwrap(), 3);
892         // locked pages + non-locked pages
893         assert_eq!(swap_file.clear_range(4..7).unwrap(), 2);
894         // non-locked pages
895         assert_eq!(swap_file.clear_range(10..11).unwrap(), 0);
896     }
897 
898     #[test]
clear_range_keep_on_disk()899     fn clear_range_keep_on_disk() {
900         let file = tempfile::tempfile().unwrap();
901         let mut swap_file = SwapFile::new(&file, 200).unwrap();
902 
903         let data = &vec![1; pagesize()];
904         swap_file.write_to_file(0, data).unwrap();
905         swap_file.clear_range(0..1).unwrap();
906 
907         let slice = swap_file.page_content(0, true).unwrap().unwrap();
908         // TODO(b/315998194): Add safety comment
909         #[allow(clippy::undocumented_unsafe_blocks)]
910         let slice = unsafe { slice::from_raw_parts(slice.as_ptr(), slice.size()) };
911         assert_eq!(slice, data);
912     }
913 
914     #[test]
clear_range_out_of_range()915     fn clear_range_out_of_range() {
916         let file = tempfile::tempfile().unwrap();
917         let mut swap_file = SwapFile::new(&file, 200).unwrap();
918         swap_file.write_to_file(199, &vec![0; pagesize()]).unwrap();
919 
920         match swap_file.clear_range(199..201) {
921             Err(Error::OutOfRange) => {}
922             _ => unreachable!("not out of range"),
923         };
924         assert!(swap_file.clear_range(199..200).is_ok());
925         match swap_file.clear_range(200..201) {
926             Err(Error::OutOfRange) => {}
927             _ => unreachable!("not out of range"),
928         };
929     }
930 
931     #[test]
free_range()932     fn free_range() {
933         let file = tempfile::tempfile().unwrap();
934         let mut swap_file = SwapFile::new(&file, 200).unwrap();
935 
936         let data = &vec![1; pagesize()];
937         swap_file.write_to_file(0, data).unwrap();
938         swap_file.free_range(0..1).unwrap();
939 
940         assert!(swap_file.page_content(0, false).unwrap().is_none());
941         assert!(swap_file.page_content(0, true).unwrap().is_none());
942     }
943 
944     #[test]
945     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
free_range_unlocked_pages()946     fn free_range_unlocked_pages() {
947         let file = tempfile::tempfile().unwrap();
948         let mut swap_file = SwapFile::new(&file, 200).unwrap();
949 
950         swap_file
951             .write_to_file(1, &vec![1; 10 * pagesize()])
952             .unwrap();
953         // 1..6 is locked, 6..11 is not locked.
954         assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5);
955 
956         // empty pages
957         assert_eq!(swap_file.free_range(0..1).unwrap(), 0);
958         // empty pages + locked pages
959         assert_eq!(swap_file.free_range(0..2).unwrap(), 1);
960         // locked pages only
961         assert_eq!(swap_file.free_range(2..4).unwrap(), 2);
962         // empty pages + locked pages + non-locked pages
963         assert_eq!(swap_file.free_range(3..7).unwrap(), 2);
964         // non-locked pages
965         assert_eq!(swap_file.free_range(10..11).unwrap(), 0);
966     }
967 
968     #[test]
free_range_out_of_range()969     fn free_range_out_of_range() {
970         let file = tempfile::tempfile().unwrap();
971         let mut swap_file = SwapFile::new(&file, 200).unwrap();
972 
973         assert_eq!(swap_file.free_range(199..200).is_ok(), true);
974         match swap_file.free_range(200..201) {
975             Err(Error::OutOfRange) => {}
976             _ => unreachable!("not out of range"),
977         };
978         match swap_file.free_range(199..201) {
979             Err(Error::OutOfRange) => {}
980             _ => unreachable!("not out of range"),
981         };
982     }
983 
984     #[test]
free_range_and_write()985     fn free_range_and_write() {
986         let file = tempfile::tempfile().unwrap();
987         let mut swap_file = SwapFile::new(&file, 200).unwrap();
988 
989         let data = &vec![1; 5 * pagesize()];
990         swap_file.write_to_file(0, data).unwrap();
991         swap_file.free_range(0..5).unwrap();
992 
993         swap_file
994             .write_to_file(0, &vec![2; 2 * pagesize()])
995             .unwrap();
996         swap_file
997             .write_to_file(5, &vec![3; 4 * pagesize()])
998             .unwrap();
999 
1000         assert_page_content(&swap_file, 0, &vec![2; pagesize()]);
1001         assert_page_content(&swap_file, 1, &vec![2; pagesize()]);
1002         assert!(swap_file.page_content(2, true).unwrap().is_none());
1003         assert!(swap_file.page_content(3, true).unwrap().is_none());
1004         assert!(swap_file.page_content(4, true).unwrap().is_none());
1005         assert_page_content(&swap_file, 5, &vec![3; pagesize()]);
1006         assert_page_content(&swap_file, 6, &vec![3; pagesize()]);
1007         assert_page_content(&swap_file, 7, &vec![3; pagesize()]);
1008         assert_page_content(&swap_file, 8, &vec![3; pagesize()]);
1009         assert!(swap_file.page_content(9, true).unwrap().is_none());
1010 
1011         let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
1012             .collect::<std::result::Result<Vec<_>, _>>();
1013         assert_eq!(data, Ok(vec![0..6 * pagesize() as u64]));
1014     }
1015 
1016     #[test]
1017     #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
clear_mlock()1018     fn clear_mlock() {
1019         let file = tempfile::tempfile().unwrap();
1020         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1021 
1022         swap_file
1023             .write_to_file(1, &vec![1; 10 * pagesize()])
1024             .unwrap();
1025         // success if there is no mlock.
1026         assert!(swap_file.clear_mlock().is_ok());
1027 
1028         assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10);
1029         // success if there is mlocked area.
1030         assert!(swap_file.clear_mlock().is_ok());
1031 
1032         // mlock area is cleared.
1033         assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10);
1034     }
1035 
1036     #[test]
first_data_range()1037     fn first_data_range() {
1038         let file = tempfile::tempfile().unwrap();
1039         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1040 
1041         swap_file
1042             .write_to_file(1, &vec![1; 2 * pagesize()])
1043             .unwrap();
1044         swap_file.write_to_file(3, &vec![2; pagesize()]).unwrap();
1045 
1046         assert_eq!(swap_file.first_data_range(200).unwrap(), 1..4);
1047         assert_eq!(swap_file.first_data_range(2).unwrap(), 1..3);
1048         assert_eq!(swap_file.first_data_range(1).unwrap(), 1..2);
1049         swap_file.clear_range(1..3).unwrap();
1050         assert_eq!(swap_file.first_data_range(2).unwrap(), 3..4);
1051         swap_file.clear_range(3..4).unwrap();
1052         assert!(swap_file.first_data_range(2).is_none());
1053     }
1054 
1055     #[test]
get_slice()1056     fn get_slice() {
1057         let file = tempfile::tempfile().unwrap();
1058         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1059 
1060         swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
1061         swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap();
1062 
1063         let slice = swap_file.get_slice(1..3).unwrap();
1064         assert_eq!(slice.size(), 2 * pagesize());
1065         let mut buf = vec![0u8; pagesize()];
1066         slice.get_slice(0, pagesize()).unwrap().copy_to(&mut buf);
1067         assert_eq!(buf, vec![1; pagesize()]);
1068 
1069         let mut buf = vec![0u8; pagesize()];
1070         slice
1071             .get_slice(pagesize(), pagesize())
1072             .unwrap()
1073             .copy_to(&mut buf);
1074         assert_eq!(buf, vec![2; pagesize()]);
1075     }
1076 
1077     #[test]
get_slice_out_of_range()1078     fn get_slice_out_of_range() {
1079         let file = tempfile::tempfile().unwrap();
1080         let swap_file = SwapFile::new(&file, 200).unwrap();
1081 
1082         match swap_file.get_slice(200..201) {
1083             Err(Error::OutOfRange) => {}
1084             other => {
1085                 unreachable!("unexpected result {:?}", other);
1086             }
1087         }
1088     }
1089 
1090     #[test]
present_pages()1091     fn present_pages() {
1092         let file = tempfile::tempfile().unwrap();
1093         let mut swap_file = SwapFile::new(&file, 200).unwrap();
1094 
1095         swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
1096         swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap();
1097 
1098         assert_eq!(swap_file.present_pages(), 2);
1099     }
1100 }
1101