xref: /aosp_15_r20/system/core/init/libprefetch/prefetch/src/format.rs (revision 00c7fec1bb09f3284aad6a6f96d2f63dfc3650ad)
1 // Copyright (C) 2024 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use std::cmp::{max, min};
16 use std::collections::{BTreeMap, HashMap, HashSet};
17 use std::fmt;
18 use std::fmt::Display;
19 use std::fs::{File, Metadata, OpenOptions};
20 use std::hash::Hash;
21 use std::io::Write;
22 use std::ops::{Deref, DerefMut};
23 use std::os::unix::fs::MetadataExt;
24 use std::time::SystemTime;
25 
26 use crc32fast::Hasher;
27 use log::debug;
28 use regex::Regex;
29 use serde::Deserializer;
30 use serde::Serialize;
31 use serde::{Deserialize, Serializer};
32 
33 use crate::error::Error;
34 
35 static MAGIC_UUID: [u8; 16] = [
36     0x10, 0x54, 0x3c, 0xb8, 0x60, 0xdb, 0x49, 0x45, 0xa1, 0xd5, 0xde, 0xa7, 0xd2, 0x3b, 0x05, 0x49,
37 ];
38 static MAJOR_VERSION: u16 = 0;
39 static MINOR_VERSION: u16 = 1;
40 
41 /// Represents inode number which is unique within a filesystem.
42 pub(crate) type InodeNumber = u64;
43 
44 /// Represents device number which is unique for given block device.
45 pub(crate) type DeviceNumber = u64;
46 
47 /// Convenience name for string that represents a path.
48 pub(crate) type PathString = String;
49 
50 /// Represents unique file id across filesystems.
51 #[derive(Clone, Debug, Deserialize, Eq, Hash, Default, PartialEq, PartialOrd, Ord, Serialize)]
52 pub struct FileId(pub u64);
53 
54 impl Display for FileId {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result55     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56         self.0.fmt(f)
57     }
58 }
59 
serialize_hashmap<S, K: Ord + Serialize + Clone, V: Serialize + Clone>( value: &HashMap<K, V>, serializer: S, ) -> Result<S::Ok, S::Error> where S: Serializer,60 fn serialize_hashmap<S, K: Ord + Serialize + Clone, V: Serialize + Clone>(
61     value: &HashMap<K, V>,
62     serializer: S,
63 ) -> Result<S::Ok, S::Error>
64 where
65     S: Serializer,
66 {
67     let mut btree = BTreeMap::new();
68     for (k, v) in value {
69         btree.insert(k.clone(), v.clone());
70     }
71     btree.serialize(serializer)
72 }
73 
74 #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
75 pub(crate) struct SerializableHashMap<
76     K: Ord + Serialize + Clone + Hash + PartialEq,
77     V: Serialize + Clone,
78 > {
79     #[serde(serialize_with = "serialize_hashmap")]
80     pub map: HashMap<K, V>,
81 }
82 
83 impl<K, V> Deref for SerializableHashMap<K, V>
84 where
85     K: Ord + Serialize + Clone + Hash + PartialEq,
86     V: Serialize + Clone,
87 {
88     type Target = HashMap<K, V>;
deref(&self) -> &Self::Target89     fn deref(&self) -> &Self::Target {
90         &self.map
91     }
92 }
93 
94 impl<K, V> DerefMut for SerializableHashMap<K, V>
95 where
96     K: Ord + Serialize + Clone + Hash + PartialEq,
97     V: Serialize + Clone,
98 {
deref_mut(&mut self) -> &mut Self::Target99     fn deref_mut(&mut self) -> &mut Self::Target {
100         &mut self.map
101     }
102 }
103 
104 /// The InodeInfo is unique per (device, inode) combination. It is
105 /// used to verify that we are prefetching a file for which we generated
106 /// the records for.
107 /// `Record` refers to this information with a unique `FileId`.
108 #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
109 pub struct InodeInfo {
110     // Inode number of the file.
111     pub(crate) inode_number: InodeNumber,
112 
113     // File size in bytes.
114     pub(crate) file_size: u64,
115 
116     // Helps to get to a file from a Record. The field is used to get to the file
117     // that needs to be prefetched.
118     //
119     // This struct is built by getting data from trace lines and querying filesystem
120     // for other fields about the file/inode.
121     //
122     // One instance per file to be prefetched. A file/inode can have multiple paths.
123     // We store multiple paths so that we can still get to it if some of the
124     // paths get deleted.
125     //
126     // See comments for `Record`.
127     #[serde(deserialize_with = "check_inode_info_paths")]
128     pub(crate) paths: Vec<PathString>,
129 
130     // Block device number on which the file is located.
131     pub(crate) device_number: DeviceNumber,
132 }
133 
134 impl InodeInfo {
135     /// Returns InodeInfo.
new( inode_number: InodeNumber, file_size: u64, paths: Vec<String>, device_number: DeviceNumber, ) -> Self136     pub fn new(
137         inode_number: InodeNumber,
138         file_size: u64,
139         paths: Vec<String>,
140         device_number: DeviceNumber,
141     ) -> Self {
142         Self { inode_number, file_size, paths, device_number }
143     }
144 }
145 
146 // Helps us check block alignment.
147 //
148 // A records file can have multiple FsInfos.
149 #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
150 pub struct FsInfo {
151     // This is filesystem block size and is not underlying device's block size
152     pub(crate) block_size: u64,
153 }
154 
155 /// Prefetch record.
156 /// Each record translates to one filesystem `read()` request.
157 ///
158 /// Tracer builds `Record` by parsing trace lines or by querying filesystem.
159 ///
160 /// Multiple `Record`s can belong to a single InodeInfo. For example if there were two
161 /// reads for file `/data/my.apk` which is assigned FileId 10 at offsets 0 and 8k of length
162 /// 1 byte each then we will have two `Records` in `RecordsFile` that look like
163 /// `Record {file_id: 10, offset: 0, length: 1, timestamp: t1}`
164 /// `Record {file_id: 10, offset: 8192, length: 1, timestamp: t2}`
165 #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
166 pub struct Record {
167     /// Points to the file that should be fetched./ file_id is unique per `InodeInfo`
168     /// in a `RecordsFile`
169     pub file_id: FileId,
170 
171     /// start offset to fetch data from. This is FsInfo.block_size aligned.
172     pub offset: u64,
173 
174     /// length of the read. This is generally rounded up to Fs.Info.block_size
175     /// except when the rounding up crosses `InodeInfo.file_size`
176     pub length: u64,
177 
178     /// Timestamp in nanoseconds since the start when the data was loaded.
179     pub timestamp: u64,
180 }
181 
182 impl Record {
183     /// Returns a new record if two records belong to same file and overlap.
overlaps(&self, other: &Self) -> Option<Self>184     fn overlaps(&self, other: &Self) -> Option<Self> {
185         if self.file_id == other.file_id {
186             let self_start = self.offset;
187             let self_end = self.offset + self.length;
188             let other_start = other.offset;
189             let other_end = other.offset + other.length;
190 
191             if (self_start <= other_end) && (self_end >= other_start) {
192                 let offset = min(self_start, other_start);
193                 let length = max(self_end, other_end) - offset;
194                 return Some(Self {
195                     file_id: self.file_id.clone(),
196                     offset,
197                     length,
198                     timestamp: min(self.timestamp, other.timestamp),
199                 });
200             }
201         }
202         None
203     }
204 }
205 
group_record_by_file_id(records: Vec<Record>) -> Vec<Record>206 fn group_record_by_file_id(records: Vec<Record>) -> Vec<Record> {
207     let mut map: HashMap<FileId, BTreeMap<u64, Record>> = HashMap::new();
208 
209     for record in &records {
210         let recs = map.entry(record.file_id.clone()).or_default();
211         recs.entry(record.offset).or_insert_with(|| record.clone());
212     }
213 
214     let mut grouped = vec![];
215     for record in &records {
216         if let Some(inode) = map.get(&record.file_id) {
217             for rec in inode.values() {
218                 grouped.push(rec.clone());
219             }
220         }
221         let _ = map.remove(&record.file_id);
222     }
223 
224     grouped
225 }
226 
227 /// When records are coalesced, because their file ids match and IO offsets overlap, the least
228 /// timestamp of the coalesced records is retained.
coalesce_records(records: Vec<Record>, group_by_file_id: bool) -> Vec<Record>229 pub(crate) fn coalesce_records(records: Vec<Record>, group_by_file_id: bool) -> Vec<Record> {
230     let records = if group_by_file_id { group_record_by_file_id(records) } else { records };
231 
232     let mut coalesced = vec![];
233     let mut current: Option<Record> = None;
234     for r in records {
235         current = match current {
236             None => Some(r),
237             Some(c) => {
238                 let merged = c.overlaps(&r);
239                 match merged {
240                     None => {
241                         coalesced.push(c);
242                         Some(r)
243                     }
244                     Some(m) => Some(m),
245                 }
246             }
247         }
248     }
249     if let Some(r) = current {
250         coalesced.push(r);
251     }
252     coalesced
253 }
254 
255 // Records file header.
256 #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
257 pub struct Header {
258     /// magic number as uuid to identify the header/format.
259     #[serde(deserialize_with = "check_magic")]
260     magic: [u8; 16],
261 
262     // major version number.
263     #[serde(deserialize_with = "check_major_number")]
264     major_number: u16,
265 
266     // minor version number.
267     #[serde(deserialize_with = "check_minor_number")]
268     minor_number: u16,
269 
270     /// timestamp when the records file was generated.
271     date: SystemTime,
272 
273     /// Checksum of the `RecordsFile` with `digest` being empty vector.
274     digest: u32,
275 }
276 
check_version_number<'de, D>( deserializer: D, expected: u16, version_type: &str, ) -> Result<u16, D::Error> where D: Deserializer<'de>,277 fn check_version_number<'de, D>(
278     deserializer: D,
279     expected: u16,
280     version_type: &str,
281 ) -> Result<u16, D::Error>
282 where
283     D: Deserializer<'de>,
284 {
285     let found = u16::deserialize(deserializer)?;
286     if expected != found {
287         return Err(serde::de::Error::custom(format!(
288             "Failed to parse {} version. Expected: {} Found: {}",
289             version_type, expected, found
290         )));
291     }
292     Ok(found)
293 }
294 
check_major_number<'de, D>(deserializer: D) -> Result<u16, D::Error> where D: Deserializer<'de>,295 fn check_major_number<'de, D>(deserializer: D) -> Result<u16, D::Error>
296 where
297     D: Deserializer<'de>,
298 {
299     check_version_number(deserializer, MAJOR_VERSION, "major")
300 }
301 
check_minor_number<'de, D>(deserializer: D) -> Result<u16, D::Error> where D: Deserializer<'de>,302 fn check_minor_number<'de, D>(deserializer: D) -> Result<u16, D::Error>
303 where
304     D: Deserializer<'de>,
305 {
306     check_version_number(deserializer, MINOR_VERSION, "minor")
307 }
308 
check_magic<'de, D>(deserializer: D) -> Result<[u8; 16], D::Error> where D: Deserializer<'de>,309 fn check_magic<'de, D>(deserializer: D) -> Result<[u8; 16], D::Error>
310 where
311     D: Deserializer<'de>,
312 {
313     let found: [u8; 16] = <[u8; 16]>::deserialize(deserializer)?;
314     if found != MAGIC_UUID {
315         return Err(serde::de::Error::custom(format!(
316             "Failed to parse magic number. Expected: {:?} Found: {:?}",
317             MAGIC_UUID, found
318         )));
319     }
320     Ok(found)
321 }
322 
check_inode_info_paths<'de, D>(deserializer: D) -> Result<Vec<PathString>, D::Error> where D: Deserializer<'de>,323 fn check_inode_info_paths<'de, D>(deserializer: D) -> Result<Vec<PathString>, D::Error>
324 where
325     D: Deserializer<'de>,
326 {
327     let parsed: Vec<PathString> = Vec::deserialize(deserializer)?;
328     if parsed.is_empty() {
329         return Err(serde::de::Error::custom("No paths found for in InodeInfo"));
330     }
331     Ok(parsed)
332 }
333 
334 // Helper inner struct of RecordsFile meant to verify checksum.
335 #[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
336 pub(crate) struct RecordsFileInner {
337     // One instance per mounted block device.
338     pub(crate) filesystems: SerializableHashMap<DeviceNumber, FsInfo>,
339 
340     /// Helps to get to a file path from a given `FileId`.
341     /// One instance per file to be prefetched.
342     pub(crate) inode_map: SerializableHashMap<FileId, InodeInfo>,
343 
344     /// Helps to get to a file and offset to be replayed..
345     ///
346     // The records are chronologically arranged meaning the data that
347     // needs first is at the beginning of the vector and the data that
348     // needs last is at the end.
349     //
350     // One instance per part of the file that needs to be prefetched.
351     pub records: Vec<Record>,
352 }
353 
354 /// Deserialized form of records file.
355 #[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
356 #[serde(remote = "Self")]
357 pub struct RecordsFile {
358     /// Helps the prefetch tool to parse rest of the file
359     pub header: Header,
360 
361     /// Helps the prefetch tool to verify checksum.
362     pub(crate) inner: RecordsFileInner,
363 }
364 
365 impl RecordsFile {
366     /// Given file id, looks up path of the file and returns open File handle.
open_file(&self, id: FileId, exclude_files_regex: &[Regex]) -> Result<File, Error>367     pub fn open_file(&self, id: FileId, exclude_files_regex: &[Regex]) -> Result<File, Error> {
368         if let Some(inode) = self.inner.inode_map.get(&id) {
369             let path = inode.paths.first().unwrap();
370 
371             for regex in exclude_files_regex {
372                 if regex.is_match(path) {
373                     return Err(Error::SkipPrefetch { path: path.to_owned() });
374                 }
375             }
376             debug!("Opening {} file {}", id.0, path);
377             OpenOptions::new()
378                 .read(true)
379                 .write(false)
380                 .open(path)
381                 .map_err(|source| Error::Open { source, path: path.to_owned() })
382         } else {
383             Err(Error::IdNoFound { id })
384         }
385     }
386 
387     /// Inserts given record in RecordsFile
insert_record(&mut self, records: Record)388     pub fn insert_record(&mut self, records: Record) {
389         self.inner.records.push(records);
390     }
391 
392     /// Inserts given InodeInfo into in RecordsFile.
insert_or_update_inode_info(&mut self, id: FileId, info: InodeInfo)393     pub fn insert_or_update_inode_info(&mut self, id: FileId, info: InodeInfo) {
394         if let Some(inode) = self.inner.inode_map.get_mut(&id) {
395             if let Some(first_path) = info.paths.first() {
396                 inode.paths.push(first_path.clone());
397             }
398         } else {
399             self.inner.inode_map.insert(id, info);
400         }
401     }
402 
403     /// Verifies the integrity of records file.
404     ///
405     /// check saves us from serializing a improperly built record file or replaying an inconsistent
406     /// `RecordFile`.
407     ///
408     /// Note: check only works on the `RecordsFile` and doesn't access filesystem. We limit the
409     /// scope so that we avoid issuing filesystem operations(directory lookup, stats) twice - once
410     /// during check and once during replaying.
check(&self) -> Result<(), Error>411     pub fn check(&self) -> Result<(), Error> {
412         let mut unique_files = HashSet::new();
413         let mut missing_file_ids = vec![];
414 
415         for record in &self.inner.records {
416             if !self.inner.inode_map.contains_key(&record.file_id) {
417                 missing_file_ids.push(record.file_id.clone());
418             }
419             unique_files.insert(record.file_id.clone());
420         }
421 
422         let mut stale_inodes = vec![];
423         let mut missing_paths = vec![];
424         for (file_id, inode_info) in &self.inner.inode_map.map {
425             if inode_info.paths.is_empty() {
426                 missing_paths.push(inode_info.clone());
427             }
428             if !unique_files.contains(file_id) {
429                 stale_inodes.push(inode_info.clone());
430             }
431         }
432 
433         if !stale_inodes.is_empty() || !missing_paths.is_empty() || !missing_file_ids.is_empty() {
434             return Err(Error::StaleInode { stale_inodes, missing_paths, missing_file_ids });
435         }
436 
437         Ok(())
438     }
439 
440     /// Builds InodeInfo from args and inserts inode info in RecordsFile.
insert_or_update_inode(&mut self, id: FileId, stat: &Metadata, path: PathString)441     pub fn insert_or_update_inode(&mut self, id: FileId, stat: &Metadata, path: PathString) {
442         self.insert_or_update_inode_info(
443             id,
444             InodeInfo {
445                 inode_number: stat.ino(),
446                 file_size: stat.len(),
447                 paths: vec![path],
448                 device_number: stat.dev(),
449             },
450         )
451     }
452 
453     /// Serialize records in the form of csv.
serialize_records_to_csv(&self, writer: &mut dyn Write) -> Result<(), Error>454     pub fn serialize_records_to_csv(&self, writer: &mut dyn Write) -> Result<(), Error> {
455         let mut wtr = csv::Writer::from_writer(writer);
456 
457         #[derive(Serialize)]
458         struct TempRecord<'a> {
459             timestamp: u64,
460             file: &'a PathString,
461             offset: u64,
462             length: u64,
463             file_size: u64,
464         }
465 
466         for record in &self.inner.records {
467             if let Some(inode_info) = self.inner.inode_map.get(&record.file_id) {
468                 let mut inode_info = inode_info.clone();
469                 inode_info.paths.sort();
470 
471                 if let Some(first_path) = inode_info.paths.first().cloned() {
472                     // Clone the &String inside Option
473                     let record = TempRecord {
474                         timestamp: record.timestamp,
475                         file: &first_path, // Now you have &String
476                         offset: record.offset,
477                         length: record.length,
478                         file_size: inode_info.file_size,
479                     };
480                     wtr.serialize(&record)
481                         .map_err(|e| Error::Serialize { error: e.to_string() })?;
482                 }
483             }
484         }
485         wtr.flush()?;
486         Ok(())
487     }
488 
compute_digest(&mut self) -> Result<u32, Error>489     fn compute_digest(&mut self) -> Result<u32, Error> {
490         self.header.digest = Default::default();
491         let serialized = serde_cbor::to_vec(self)
492             .map_err(|source| Error::Serialize { error: source.to_string() })?;
493 
494         let mut hasher = Hasher::new();
495         hasher.update(&serialized);
496 
497         Ok(hasher.finalize())
498     }
499 
500     /// Convenience wrapper around serialize that adds checksum/digest to the file
501     /// to verify file consistency during replay/deserialize.
add_checksum_and_serialize(&mut self) -> Result<Vec<u8>, Error>502     pub fn add_checksum_and_serialize(&mut self) -> Result<Vec<u8>, Error> {
503         self.header.digest = self.compute_digest()?;
504 
505         serde_cbor::to_vec(self).map_err(|source| Error::Serialize { error: source.to_string() })
506     }
507 }
508 
509 impl Default for Header {
default() -> Self510     fn default() -> Self {
511         Self {
512             major_number: MAJOR_VERSION,
513             minor_number: MINOR_VERSION,
514             date: SystemTime::now(),
515             digest: 0,
516             magic: MAGIC_UUID,
517         }
518     }
519 }
520 
521 // Wrapper around deserialize to check any inconsistencies in the file format.
522 impl<'de> Deserialize<'de> for RecordsFile {
deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error> where D: Deserializer<'de>,523     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
524     where
525         D: Deserializer<'de>,
526     {
527         let rf = Self::deserialize(deserializer)?;
528 
529         rf.check().map_err(|e| {
530             serde::de::Error::custom(format!("failed to validate records file: {}", e))
531         })?;
532 
533         let mut zero_digest = rf.clone();
534         zero_digest.header.digest = 0;
535         let digest =
536             zero_digest.compute_digest().map_err(|e| serde::de::Error::custom(format!("{}", e)))?;
537 
538         if digest != rf.header.digest {
539             return Err(serde::de::Error::custom(format!(
540                 "file consistency check failed. Expected: {}. Found: {}",
541                 digest, rf.header.digest
542             )));
543         }
544 
545         Ok(rf)
546     }
547 }
548 
549 // Wrapper around serialize to check any inconsistencies in the file format before serializing
550 impl Serialize for RecordsFile {
serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> where S: Serializer,551     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
552     where
553         S: Serializer,
554     {
555         self.check().map(|_| self).map_err(|e| {
556             serde::ser::Error::custom(format!("failed to validate records file: {}", e))
557         })?;
558         Self::serialize(self, serializer)
559     }
560 }
561 
562 #[cfg(test)]
563 pub mod tests {
564 
565     use std::assert_eq;
566 
567     use super::*;
568 
569     #[test]
test_major_version_mismatch()570     fn test_major_version_mismatch() {
571         let mut rf = RecordsFile::default();
572 
573         rf.header.major_number += 1;
574 
575         let serialized: Result<RecordsFile, serde_cbor::Error> =
576             serde_cbor::from_slice(&serde_cbor::to_vec(&rf).unwrap());
577 
578         assert_eq!(
579             serialized.unwrap_err().to_string(),
580             format!(
581                 "Failed to parse major version. Expected: {} Found: {}",
582                 MAJOR_VERSION,
583                 MAJOR_VERSION + 1
584             )
585         );
586     }
587 
588     #[test]
test_minor_version_mismatch()589     fn test_minor_version_mismatch() {
590         let mut rf = RecordsFile::default();
591 
592         rf.header.minor_number += 1;
593 
594         let serialized: Result<RecordsFile, serde_cbor::Error> =
595             serde_cbor::from_slice(&serde_cbor::to_vec(&rf).unwrap());
596 
597         assert_eq!(
598             serialized.unwrap_err().to_string(),
599             format!(
600                 "Failed to parse minor version. Expected: {} Found: {}",
601                 MINOR_VERSION,
602                 MINOR_VERSION + 1
603             )
604         );
605     }
606 
607     #[test]
deserialize_inode_info_without_path()608     fn deserialize_inode_info_without_path() {
609         let inode = InodeInfo { inode_number: 1, file_size: 10, paths: vec![], device_number: 1 };
610         let serialized = serde_cbor::to_vec(&inode).unwrap();
611         let deserialized: Result<InodeInfo, serde_cbor::Error> =
612             serde_cbor::from_slice(&serialized);
613         assert_eq!(
614             deserialized.unwrap_err().to_string(),
615             "No paths found for in InodeInfo".to_owned()
616         );
617     }
618     #[test]
test_serialize_records_to_csv()619     fn test_serialize_records_to_csv() {
620         let mut rf = RecordsFile::default();
621         let file_count = 4;
622         for i in 0..file_count {
623             rf.insert_or_update_inode_info(
624                 FileId(i),
625                 InodeInfo {
626                     inode_number: i,
627                     file_size: i * 10,
628                     paths: vec![format!("/hello/{}", i)],
629                     device_number: i + 10,
630                 },
631             )
632         }
633         for i in 0..10 {
634             rf.insert_record(Record {
635                 file_id: FileId(i % file_count),
636                 offset: i * 3,
637                 length: i + 4,
638                 timestamp: i * file_count,
639             });
640         }
641 
642         let mut buf = vec![];
643         rf.serialize_records_to_csv(&mut buf).unwrap();
644 
645         let data = String::from_utf8(buf).unwrap();
646         assert_eq!(
647             data,
648             "timestamp,file,offset,length,file_size\n\
649             0,/hello/0,0,4,0\n\
650             4,/hello/1,3,5,10\n\
651             8,/hello/2,6,6,20\n\
652             12,/hello/3,9,7,30\n\
653             16,/hello/0,12,8,0\n\
654             20,/hello/1,15,9,10\n\
655             24,/hello/2,18,10,20\n\
656             28,/hello/3,21,11,30\n\
657             32,/hello/0,24,12,0\n\
658             36,/hello/1,27,13,10\n"
659         );
660     }
661 
new_record(file: u64, offset: u64, length: u64, timestamp: u64) -> Record662     fn new_record(file: u64, offset: u64, length: u64, timestamp: u64) -> Record {
663         Record { file_id: FileId(file), offset, length, timestamp }
664     }
665 
666     #[test]
test_coalesced_without_group()667     fn test_coalesced_without_group() {
668         let non_coalescable_same_inode =
669             vec![new_record(1, 2, 3, 4), new_record(1, 6, 3, 5), new_record(1, 10, 3, 6)];
670         assert_eq!(
671             coalesce_records(non_coalescable_same_inode.clone(), false),
672             non_coalescable_same_inode
673         );
674 
675         let non_coalescable_different_inode =
676             vec![new_record(1, 2, 3, 4), new_record(2, 5, 3, 5), new_record(3, 8, 3, 6)];
677         assert_eq!(
678             coalesce_records(non_coalescable_different_inode.clone(), false),
679             non_coalescable_different_inode
680         );
681 
682         let some_coalesced =
683             vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(3, 8, 3, 6)];
684         assert_eq!(
685             coalesce_records(some_coalesced, false),
686             vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 6),]
687         );
688 
689         let coalesced_into_one =
690             vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(1, 8, 3, 6)];
691         assert_eq!(coalesce_records(coalesced_into_one, false), vec![new_record(1, 2, 9, 4)]);
692 
693         let no_grouping_or_coalescing =
694             vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6)];
695         assert_eq!(
696             coalesce_records(no_grouping_or_coalescing, false),
697             vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6),]
698         );
699     }
700 
701     #[test]
test_coalesced_with_grouping()702     fn test_coalesced_with_grouping() {
703         let non_coalescable_same_inode =
704             vec![new_record(1, 2, 3, 4), new_record(1, 6, 3, 5), new_record(1, 10, 3, 6)];
705         assert_eq!(
706             coalesce_records(non_coalescable_same_inode.clone(), true),
707             non_coalescable_same_inode
708         );
709 
710         let non_coalescable_different_inode =
711             vec![new_record(1, 2, 3, 4), new_record(2, 5, 3, 5), new_record(3, 8, 3, 6)];
712         assert_eq!(
713             coalesce_records(non_coalescable_different_inode.clone(), true),
714             non_coalescable_different_inode
715         );
716 
717         let some_coalesced =
718             vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(3, 8, 3, 6)];
719         assert_eq!(
720             coalesce_records(some_coalesced, true),
721             vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 6),]
722         );
723 
724         let coalesced_into_one =
725             vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(1, 8, 3, 6)];
726         assert_eq!(coalesce_records(coalesced_into_one, true), vec![new_record(1, 2, 9, 4)]);
727 
728         let some_grouped_coalesced =
729             vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6)];
730         assert_eq!(
731             coalesce_records(some_grouped_coalesced, true),
732             vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 5),]
733         );
734     }
735 
736     #[test]
check_missing_records()737     fn check_missing_records() {
738         let mut rf = RecordsFile::default();
739         rf.inner.inode_map.insert(
740             FileId(0),
741             InodeInfo {
742                 inode_number: 0,
743                 file_size: 1,
744                 paths: vec!["hello".to_owned()],
745                 device_number: 2,
746             },
747         );
748         rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 });
749 
750         rf.inner.inode_map.insert(
751             FileId(1),
752             InodeInfo {
753                 inode_number: 1,
754                 file_size: 2,
755                 paths: vec!["world".to_owned()],
756                 device_number: 3,
757             },
758         );
759         let e = rf.check().unwrap_err();
760         assert_eq!(
761             e.to_string(),
762             "Stale inode(s) info found.\n\
763                 missing_file_ids: []\n\
764                 stale_inodes: [\n    \
765                     InodeInfo {\n        \
766                         inode_number: 1,\n        \
767                         file_size: 2,\n        \
768                         paths: [\n            \"world\",\n        ],\n        \
769                         device_number: 3,\n    },\n] \n\
770                 missing_paths:[]"
771         );
772     }
773 
774     #[test]
check_missing_file()775     fn check_missing_file() {
776         let mut rf = RecordsFile::default();
777         rf.inner.inode_map.insert(
778             FileId(0),
779             InodeInfo {
780                 inode_number: 0,
781                 file_size: 1,
782                 paths: vec!["hello".to_owned()],
783                 device_number: 2,
784             },
785         );
786         rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 });
787         rf.insert_record(Record { file_id: FileId(1), offset: 10, length: 20, timestamp: 30 });
788 
789         let e = rf.check().unwrap_err();
790         assert_eq!(
791             e.to_string(),
792             "Stale inode(s) info found.\n\
793                 missing_file_ids: [\n    \
794                     FileId(\n        1,\n    ),\n]\n\
795                 stale_inodes: [] \n\
796                 missing_paths:[]"
797         );
798     }
799 
800     #[test]
check_missing_paths()801     fn check_missing_paths() {
802         let mut rf = RecordsFile::default();
803         rf.inner.inode_map.insert(
804             FileId(0),
805             InodeInfo { inode_number: 0, file_size: 1, paths: vec![], device_number: 2 },
806         );
807         rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 });
808 
809         let e = rf.check().unwrap_err();
810         assert_eq!(
811             e.to_string(),
812             "Stale inode(s) info found.\n\
813                 missing_file_ids: []\n\
814                 stale_inodes: [] \n\
815                 missing_paths:[\n    \
816                     InodeInfo {\n        \
817                         inode_number: 0,\n        \
818                         file_size: 1,\n        \
819                         paths: [],\n        \
820                         device_number: 2,\n    },\n]"
821         );
822     }
823 }
824