1 /*! 2 Crate `walkdir` provides an efficient and cross platform implementation 3 of recursive directory traversal. Several options are exposed to control 4 iteration, such as whether to follow symbolic links (default off), limit the 5 maximum number of simultaneous open file descriptors and the ability to 6 efficiently skip descending into directories. 7 8 To use this crate, add `walkdir` as a dependency to your project's 9 `Cargo.toml`: 10 11 ```toml 12 [dependencies] 13 walkdir = "2" 14 ``` 15 16 # From the top 17 18 The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values 19 yielded by the iterator. Finally, the [`Error`] type is a small wrapper around 20 [`std::io::Error`] with additional information, such as if a loop was detected 21 while following symbolic links (not enabled by default). 22 23 [`WalkDir`]: struct.WalkDir.html 24 [`DirEntry`]: struct.DirEntry.html 25 [`Error`]: struct.Error.html 26 [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html 27 28 # Example 29 30 The following code recursively iterates over the directory given and prints 31 the path for each entry: 32 33 ```no_run 34 use walkdir::WalkDir; 35 # use walkdir::Error; 36 37 # fn try_main() -> Result<(), Error> { 38 for entry in WalkDir::new("foo") { 39 println!("{}", entry?.path().display()); 40 } 41 # Ok(()) 42 # } 43 ``` 44 45 Or, if you'd like to iterate over all entries and ignore any errors that 46 may arise, use [`filter_map`]. (e.g., This code below will silently skip 47 directories that the owner of the running process does not have permission to 48 access.) 49 50 ```no_run 51 use walkdir::WalkDir; 52 53 for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { 54 println!("{}", entry.path().display()); 55 } 56 ``` 57 58 [`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map 59 60 # Example: follow symbolic links 61 62 The same code as above, except [`follow_links`] is enabled: 63 64 ```no_run 65 use walkdir::WalkDir; 66 # use walkdir::Error; 67 68 # fn try_main() -> Result<(), Error> { 69 for entry in WalkDir::new("foo").follow_links(true) { 70 println!("{}", entry?.path().display()); 71 } 72 # Ok(()) 73 # } 74 ``` 75 76 [`follow_links`]: struct.WalkDir.html#method.follow_links 77 78 # Example: skip hidden files and directories on unix 79 80 This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files 81 and directories efficiently (i.e. without recursing into hidden directories): 82 83 ```no_run 84 use walkdir::{DirEntry, WalkDir}; 85 # use walkdir::Error; 86 87 fn is_hidden(entry: &DirEntry) -> bool { 88 entry.file_name() 89 .to_str() 90 .map(|s| s.starts_with(".")) 91 .unwrap_or(false) 92 } 93 94 # fn try_main() -> Result<(), Error> { 95 let walker = WalkDir::new("foo").into_iter(); 96 for entry in walker.filter_entry(|e| !is_hidden(e)) { 97 println!("{}", entry?.path().display()); 98 } 99 # Ok(()) 100 # } 101 ``` 102 103 [`filter_entry`]: struct.IntoIter.html#method.filter_entry 104 */ 105 106 #![deny(missing_docs)] 107 #![allow(unknown_lints)] 108 109 #[cfg(doctest)] 110 doc_comment::doctest!("../README.md"); 111 112 use std::cmp::{min, Ordering}; 113 use std::fmt; 114 use std::fs::{self, ReadDir}; 115 use std::io; 116 use std::path::{Path, PathBuf}; 117 use std::result; 118 use std::vec; 119 120 use same_file::Handle; 121 122 pub use crate::dent::DirEntry; 123 #[cfg(unix)] 124 pub use crate::dent::DirEntryExt; 125 pub use crate::error::Error; 126 127 mod dent; 128 mod error; 129 #[cfg(test)] 130 mod tests; 131 mod util; 132 133 /// Like try, but for iterators that return [`Option<Result<_, _>>`]. 134 /// 135 /// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html 136 macro_rules! itry { 137 ($e:expr) => { 138 match $e { 139 Ok(v) => v, 140 Err(err) => return Some(Err(From::from(err))), 141 } 142 }; 143 } 144 145 /// A result type for walkdir operations. 146 /// 147 /// Note that this result type embeds the error type in this crate. This 148 /// is only useful if you care about the additional information provided by 149 /// the error (such as the path associated with the error or whether a loop 150 /// was dectected). If you want things to Just Work, then you can use 151 /// [`io::Result`] instead since the error type in this package will 152 /// automatically convert to an [`io::Result`] when using the [`try!`] macro. 153 /// 154 /// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html 155 /// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html 156 pub type Result<T> = ::std::result::Result<T, Error>; 157 158 /// A builder to create an iterator for recursively walking a directory. 159 /// 160 /// Results are returned in depth first fashion, with directories yielded 161 /// before their contents. If [`contents_first`] is true, contents are yielded 162 /// before their directories. The order is unspecified but if [`sort_by`] is 163 /// given, directory entries are sorted according to this function. Directory 164 /// entries `.` and `..` are always omitted. 165 /// 166 /// If an error occurs at any point during iteration, then it is returned in 167 /// place of its corresponding directory entry and iteration continues as 168 /// normal. If an error occurs while opening a directory for reading, then it 169 /// is not descended into (but the error is still yielded by the iterator). 170 /// Iteration may be stopped at any time. When the iterator is destroyed, all 171 /// resources associated with it are freed. 172 /// 173 /// [`contents_first`]: struct.WalkDir.html#method.contents_first 174 /// [`sort_by`]: struct.WalkDir.html#method.sort_by 175 /// 176 /// # Usage 177 /// 178 /// This type implements [`IntoIterator`] so that it may be used as the subject 179 /// of a `for` loop. You may need to call [`into_iter`] explicitly if you want 180 /// to use iterator adapters such as [`filter_entry`]. 181 /// 182 /// Idiomatic use of this type should use method chaining to set desired 183 /// options. For example, this only shows entries with a depth of `1`, `2` or 184 /// `3` (relative to `foo`): 185 /// 186 /// ```no_run 187 /// use walkdir::WalkDir; 188 /// # use walkdir::Error; 189 /// 190 /// # fn try_main() -> Result<(), Error> { 191 /// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { 192 /// println!("{}", entry?.path().display()); 193 /// } 194 /// # Ok(()) 195 /// # } 196 /// ``` 197 /// 198 /// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html 199 /// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter 200 /// [`filter_entry`]: struct.IntoIter.html#method.filter_entry 201 /// 202 /// Note that the iterator by default includes the top-most directory. Since 203 /// this is the only directory yielded with depth `0`, it is easy to ignore it 204 /// with the [`min_depth`] setting: 205 /// 206 /// ```no_run 207 /// use walkdir::WalkDir; 208 /// # use walkdir::Error; 209 /// 210 /// # fn try_main() -> Result<(), Error> { 211 /// for entry in WalkDir::new("foo").min_depth(1) { 212 /// println!("{}", entry?.path().display()); 213 /// } 214 /// # Ok(()) 215 /// # } 216 /// ``` 217 /// 218 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 219 /// 220 /// This will only return descendents of the `foo` directory and not `foo` 221 /// itself. 222 /// 223 /// # Loops 224 /// 225 /// This iterator (like most/all recursive directory iterators) assumes that 226 /// no loops can be made with *hard* links on your file system. In particular, 227 /// this would require creating a hard link to a directory such that it creates 228 /// a loop. On most platforms, this operation is illegal. 229 /// 230 /// Note that when following symbolic/soft links, loops are detected and an 231 /// error is reported. 232 #[derive(Debug)] 233 pub struct WalkDir { 234 opts: WalkDirOptions, 235 root: PathBuf, 236 } 237 238 struct WalkDirOptions { 239 follow_links: bool, 240 follow_root_links: bool, 241 max_open: usize, 242 min_depth: usize, 243 max_depth: usize, 244 sorter: Option< 245 Box< 246 dyn FnMut(&DirEntry, &DirEntry) -> Ordering 247 + Send 248 + Sync 249 + 'static, 250 >, 251 >, 252 contents_first: bool, 253 same_file_system: bool, 254 } 255 256 impl fmt::Debug for WalkDirOptions { fmt( &self, f: &mut fmt::Formatter<'_>, ) -> result::Result<(), fmt::Error>257 fn fmt( 258 &self, 259 f: &mut fmt::Formatter<'_>, 260 ) -> result::Result<(), fmt::Error> { 261 let sorter_str = if self.sorter.is_some() { 262 // FnMut isn't `Debug` 263 "Some(...)" 264 } else { 265 "None" 266 }; 267 f.debug_struct("WalkDirOptions") 268 .field("follow_links", &self.follow_links) 269 .field("follow_root_link", &self.follow_root_links) 270 .field("max_open", &self.max_open) 271 .field("min_depth", &self.min_depth) 272 .field("max_depth", &self.max_depth) 273 .field("sorter", &sorter_str) 274 .field("contents_first", &self.contents_first) 275 .field("same_file_system", &self.same_file_system) 276 .finish() 277 } 278 } 279 280 impl WalkDir { 281 /// Create a builder for a recursive directory iterator starting at the 282 /// file path `root`. If `root` is a directory, then it is the first item 283 /// yielded by the iterator. If `root` is a file, then it is the first 284 /// and only item yielded by the iterator. If `root` is a symlink, then it 285 /// is always followed for the purposes of directory traversal. (A root 286 /// `DirEntry` still obeys its documentation with respect to symlinks and 287 /// the `follow_links` setting.) new<P: AsRef<Path>>(root: P) -> Self288 pub fn new<P: AsRef<Path>>(root: P) -> Self { 289 WalkDir { 290 opts: WalkDirOptions { 291 follow_links: false, 292 follow_root_links: true, 293 max_open: 10, 294 min_depth: 0, 295 max_depth: ::std::usize::MAX, 296 sorter: None, 297 contents_first: false, 298 same_file_system: false, 299 }, 300 root: root.as_ref().to_path_buf(), 301 } 302 } 303 304 /// Set the minimum depth of entries yielded by the iterator. 305 /// 306 /// The smallest depth is `0` and always corresponds to the path given 307 /// to the `new` function on this type. Its direct descendents have depth 308 /// `1`, and their descendents have depth `2`, and so on. min_depth(mut self, depth: usize) -> Self309 pub fn min_depth(mut self, depth: usize) -> Self { 310 self.opts.min_depth = depth; 311 if self.opts.min_depth > self.opts.max_depth { 312 self.opts.min_depth = self.opts.max_depth; 313 } 314 self 315 } 316 317 /// Set the maximum depth of entries yield by the iterator. 318 /// 319 /// The smallest depth is `0` and always corresponds to the path given 320 /// to the `new` function on this type. Its direct descendents have depth 321 /// `1`, and their descendents have depth `2`, and so on. 322 /// 323 /// Note that this will not simply filter the entries of the iterator, but 324 /// it will actually avoid descending into directories when the depth is 325 /// exceeded. max_depth(mut self, depth: usize) -> Self326 pub fn max_depth(mut self, depth: usize) -> Self { 327 self.opts.max_depth = depth; 328 if self.opts.max_depth < self.opts.min_depth { 329 self.opts.max_depth = self.opts.min_depth; 330 } 331 self 332 } 333 334 /// Follow symbolic links. By default, this is disabled. 335 /// 336 /// When `yes` is `true`, symbolic links are followed as if they were 337 /// normal directories and files. If a symbolic link is broken or is 338 /// involved in a loop, an error is yielded. 339 /// 340 /// When enabled, the yielded [`DirEntry`] values represent the target of 341 /// the link while the path corresponds to the link. See the [`DirEntry`] 342 /// type for more details. 343 /// 344 /// [`DirEntry`]: struct.DirEntry.html follow_links(mut self, yes: bool) -> Self345 pub fn follow_links(mut self, yes: bool) -> Self { 346 self.opts.follow_links = yes; 347 self 348 } 349 350 /// Follow symbolic links if these are the root of the traversal. 351 /// By default, this is enabled. 352 /// 353 /// When `yes` is `true`, symbolic links on root paths are followed 354 /// which is effective if the symbolic link points to a directory. 355 /// If a symbolic link is broken or is involved in a loop, an error is yielded 356 /// as the first entry of the traversal. 357 /// 358 /// When enabled, the yielded [`DirEntry`] values represent the target of 359 /// the link while the path corresponds to the link. See the [`DirEntry`] 360 /// type for more details, and all future entries will be contained within 361 /// the resolved directory behind the symbolic link of the root path. 362 /// 363 /// [`DirEntry`]: struct.DirEntry.html follow_root_links(mut self, yes: bool) -> Self364 pub fn follow_root_links(mut self, yes: bool) -> Self { 365 self.opts.follow_root_links = yes; 366 self 367 } 368 369 /// Set the maximum number of simultaneously open file descriptors used 370 /// by the iterator. 371 /// 372 /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set 373 /// to `1` automatically. If this is not set, then it defaults to some 374 /// reasonably low number. 375 /// 376 /// This setting has no impact on the results yielded by the iterator 377 /// (even when `n` is `1`). Instead, this setting represents a trade off 378 /// between scarce resources (file descriptors) and memory. Namely, when 379 /// the maximum number of file descriptors is reached and a new directory 380 /// needs to be opened to continue iteration, then a previous directory 381 /// handle is closed and has its unyielded entries stored in memory. In 382 /// practice, this is a satisfying trade off because it scales with respect 383 /// to the *depth* of your file tree. Therefore, low values (even `1`) are 384 /// acceptable. 385 /// 386 /// Note that this value does not impact the number of system calls made by 387 /// an exhausted iterator. 388 /// 389 /// # Platform behavior 390 /// 391 /// On Windows, if `follow_links` is enabled, then this limit is not 392 /// respected. In particular, the maximum number of file descriptors opened 393 /// is proportional to the depth of the directory tree traversed. max_open(mut self, mut n: usize) -> Self394 pub fn max_open(mut self, mut n: usize) -> Self { 395 if n == 0 { 396 n = 1; 397 } 398 self.opts.max_open = n; 399 self 400 } 401 402 /// Set a function for sorting directory entries with a comparator 403 /// function. 404 /// 405 /// If a compare function is set, the resulting iterator will return all 406 /// paths in sorted order. The compare function will be called to compare 407 /// entries from the same directory. 408 /// 409 /// ```rust,no_run 410 /// use std::cmp; 411 /// use std::ffi::OsString; 412 /// use walkdir::WalkDir; 413 /// 414 /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); 415 /// ``` sort_by<F>(mut self, cmp: F) -> Self where F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,416 pub fn sort_by<F>(mut self, cmp: F) -> Self 417 where 418 F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static, 419 { 420 self.opts.sorter = Some(Box::new(cmp)); 421 self 422 } 423 424 /// Set a function for sorting directory entries with a key extraction 425 /// function. 426 /// 427 /// If a compare function is set, the resulting iterator will return all 428 /// paths in sorted order. The compare function will be called to compare 429 /// entries from the same directory. 430 /// 431 /// ```rust,no_run 432 /// use std::cmp; 433 /// use std::ffi::OsString; 434 /// use walkdir::WalkDir; 435 /// 436 /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned()); 437 /// ``` sort_by_key<K, F>(self, mut cmp: F) -> Self where F: FnMut(&DirEntry) -> K + Send + Sync + 'static, K: Ord,438 pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self 439 where 440 F: FnMut(&DirEntry) -> K + Send + Sync + 'static, 441 K: Ord, 442 { 443 self.sort_by(move |a, b| cmp(a).cmp(&cmp(b))) 444 } 445 446 /// Sort directory entries by file name, to ensure a deterministic order. 447 /// 448 /// This is a convenience function for calling `Self::sort_by()`. 449 /// 450 /// ```rust,no_run 451 /// use walkdir::WalkDir; 452 /// 453 /// WalkDir::new("foo").sort_by_file_name(); 454 /// ``` sort_by_file_name(self) -> Self455 pub fn sort_by_file_name(self) -> Self { 456 self.sort_by(|a, b| a.file_name().cmp(b.file_name())) 457 } 458 459 /// Yield a directory's contents before the directory itself. By default, 460 /// this is disabled. 461 /// 462 /// When `yes` is `false` (as is the default), the directory is yielded 463 /// before its contents are read. This is useful when, e.g. you want to 464 /// skip processing of some directories. 465 /// 466 /// When `yes` is `true`, the iterator yields the contents of a directory 467 /// before yielding the directory itself. This is useful when, e.g. you 468 /// want to recursively delete a directory. 469 /// 470 /// # Example 471 /// 472 /// Assume the following directory tree: 473 /// 474 /// ```text 475 /// foo/ 476 /// abc/ 477 /// qrs 478 /// tuv 479 /// def/ 480 /// ``` 481 /// 482 /// With contents_first disabled (the default), the following code visits 483 /// the directory tree in depth-first order: 484 /// 485 /// ```no_run 486 /// use walkdir::WalkDir; 487 /// 488 /// for entry in WalkDir::new("foo") { 489 /// let entry = entry.unwrap(); 490 /// println!("{}", entry.path().display()); 491 /// } 492 /// 493 /// // foo 494 /// // foo/abc 495 /// // foo/abc/qrs 496 /// // foo/abc/tuv 497 /// // foo/def 498 /// ``` 499 /// 500 /// With contents_first enabled: 501 /// 502 /// ```no_run 503 /// use walkdir::WalkDir; 504 /// 505 /// for entry in WalkDir::new("foo").contents_first(true) { 506 /// let entry = entry.unwrap(); 507 /// println!("{}", entry.path().display()); 508 /// } 509 /// 510 /// // foo/abc/qrs 511 /// // foo/abc/tuv 512 /// // foo/abc 513 /// // foo/def 514 /// // foo 515 /// ``` contents_first(mut self, yes: bool) -> Self516 pub fn contents_first(mut self, yes: bool) -> Self { 517 self.opts.contents_first = yes; 518 self 519 } 520 521 /// Do not cross file system boundaries. 522 /// 523 /// When this option is enabled, directory traversal will not descend into 524 /// directories that are on a different file system from the root path. 525 /// 526 /// Currently, this option is only supported on Unix and Windows. If this 527 /// option is used on an unsupported platform, then directory traversal 528 /// will immediately return an error and will not yield any entries. same_file_system(mut self, yes: bool) -> Self529 pub fn same_file_system(mut self, yes: bool) -> Self { 530 self.opts.same_file_system = yes; 531 self 532 } 533 } 534 535 impl IntoIterator for WalkDir { 536 type Item = Result<DirEntry>; 537 type IntoIter = IntoIter; 538 into_iter(self) -> IntoIter539 fn into_iter(self) -> IntoIter { 540 IntoIter { 541 opts: self.opts, 542 start: Some(self.root), 543 stack_list: vec![], 544 stack_path: vec![], 545 oldest_opened: 0, 546 depth: 0, 547 deferred_dirs: vec![], 548 root_device: None, 549 } 550 } 551 } 552 553 /// An iterator for recursively descending into a directory. 554 /// 555 /// A value with this type must be constructed with the [`WalkDir`] type, which 556 /// uses a builder pattern to set options such as min/max depth, max open file 557 /// descriptors and whether the iterator should follow symbolic links. After 558 /// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. 559 /// 560 /// The order of elements yielded by this iterator is unspecified. 561 /// 562 /// [`WalkDir`]: struct.WalkDir.html 563 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v 564 #[derive(Debug)] 565 pub struct IntoIter { 566 /// Options specified in the builder. Depths, max fds, etc. 567 opts: WalkDirOptions, 568 /// The start path. 569 /// 570 /// This is only `Some(...)` at the beginning. After the first iteration, 571 /// this is always `None`. 572 start: Option<PathBuf>, 573 /// A stack of open (up to max fd) or closed handles to directories. 574 /// An open handle is a plain [`fs::ReadDir`] while a closed handle is 575 /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries. 576 /// 577 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html 578 stack_list: Vec<DirList>, 579 /// A stack of file paths. 580 /// 581 /// This is *only* used when [`follow_links`] is enabled. In all other 582 /// cases this stack is empty. 583 /// 584 /// [`follow_links`]: struct.WalkDir.html#method.follow_links 585 stack_path: Vec<Ancestor>, 586 /// An index into `stack_list` that points to the oldest open directory 587 /// handle. If the maximum fd limit is reached and a new directory needs to 588 /// be read, the handle at this index is closed before the new directory is 589 /// opened. 590 oldest_opened: usize, 591 /// The current depth of iteration (the length of the stack at the 592 /// beginning of each iteration). 593 depth: usize, 594 /// A list of DirEntries corresponding to directories, that are 595 /// yielded after their contents has been fully yielded. This is only 596 /// used when `contents_first` is enabled. 597 deferred_dirs: Vec<DirEntry>, 598 /// The device of the root file path when the first call to `next` was 599 /// made. 600 /// 601 /// If the `same_file_system` option isn't enabled, then this is always 602 /// `None`. Conversely, if it is enabled, this is always `Some(...)` after 603 /// handling the root path. 604 root_device: Option<u64>, 605 } 606 607 /// An ancestor is an item in the directory tree traversed by walkdir, and is 608 /// used to check for loops in the tree when traversing symlinks. 609 #[derive(Debug)] 610 struct Ancestor { 611 /// The path of this ancestor. 612 path: PathBuf, 613 /// An open file to this ancesor. This is only used on Windows where 614 /// opening a file handle appears to be quite expensive, so we choose to 615 /// cache it. This comes at the cost of not respecting the file descriptor 616 /// limit set by the user. 617 #[cfg(windows)] 618 handle: Handle, 619 } 620 621 impl Ancestor { 622 /// Create a new ancestor from the given directory path. 623 #[cfg(windows)] new(dent: &DirEntry) -> io::Result<Ancestor>624 fn new(dent: &DirEntry) -> io::Result<Ancestor> { 625 let handle = Handle::from_path(dent.path())?; 626 Ok(Ancestor { path: dent.path().to_path_buf(), handle }) 627 } 628 629 /// Create a new ancestor from the given directory path. 630 #[cfg(not(windows))] new(dent: &DirEntry) -> io::Result<Ancestor>631 fn new(dent: &DirEntry) -> io::Result<Ancestor> { 632 Ok(Ancestor { path: dent.path().to_path_buf() }) 633 } 634 635 /// Returns true if and only if the given open file handle corresponds to 636 /// the same directory as this ancestor. 637 #[cfg(windows)] is_same(&self, child: &Handle) -> io::Result<bool>638 fn is_same(&self, child: &Handle) -> io::Result<bool> { 639 Ok(child == &self.handle) 640 } 641 642 /// Returns true if and only if the given open file handle corresponds to 643 /// the same directory as this ancestor. 644 #[cfg(not(windows))] is_same(&self, child: &Handle) -> io::Result<bool>645 fn is_same(&self, child: &Handle) -> io::Result<bool> { 646 Ok(child == &Handle::from_path(&self.path)?) 647 } 648 } 649 650 /// A sequence of unconsumed directory entries. 651 /// 652 /// This represents the opened or closed state of a directory handle. When 653 /// open, future entries are read by iterating over the raw `fs::ReadDir`. 654 /// When closed, all future entries are read into memory. Iteration then 655 /// proceeds over a [`Vec<fs::DirEntry>`]. 656 /// 657 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html 658 /// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html 659 #[derive(Debug)] 660 enum DirList { 661 /// An opened handle. 662 /// 663 /// This includes the depth of the handle itself. 664 /// 665 /// If there was an error with the initial [`fs::read_dir`] call, then it 666 /// is stored here. (We use an [`Option<...>`] to make yielding the error 667 /// exactly once simpler.) 668 /// 669 /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html 670 /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html 671 Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> }, 672 /// A closed handle. 673 /// 674 /// All remaining directory entries are read into memory. 675 Closed(vec::IntoIter<Result<DirEntry>>), 676 } 677 678 impl Iterator for IntoIter { 679 type Item = Result<DirEntry>; 680 /// Advances the iterator and returns the next value. 681 /// 682 /// # Errors 683 /// 684 /// If the iterator fails to retrieve the next value, this method returns 685 /// an error value. The error will be wrapped in an Option::Some. next(&mut self) -> Option<Result<DirEntry>>686 fn next(&mut self) -> Option<Result<DirEntry>> { 687 if let Some(start) = self.start.take() { 688 if self.opts.same_file_system { 689 let result = util::device_num(&start) 690 .map_err(|e| Error::from_path(0, start.clone(), e)); 691 self.root_device = Some(itry!(result)); 692 } 693 let dent = itry!(DirEntry::from_path(0, start, false)); 694 if let Some(result) = self.handle_entry(dent) { 695 return Some(result); 696 } 697 } 698 while !self.stack_list.is_empty() { 699 self.depth = self.stack_list.len(); 700 if let Some(dentry) = self.get_deferred_dir() { 701 return Some(Ok(dentry)); 702 } 703 if self.depth > self.opts.max_depth { 704 // If we've exceeded the max depth, pop the current dir 705 // so that we don't descend. 706 self.pop(); 707 continue; 708 } 709 // Unwrap is safe here because we've verified above that 710 // `self.stack_list` is not empty 711 let next = self 712 .stack_list 713 .last_mut() 714 .expect("BUG: stack should be non-empty") 715 .next(); 716 match next { 717 None => self.pop(), 718 Some(Err(err)) => return Some(Err(err)), 719 Some(Ok(dent)) => { 720 if let Some(result) = self.handle_entry(dent) { 721 return Some(result); 722 } 723 } 724 } 725 } 726 if self.opts.contents_first { 727 self.depth = self.stack_list.len(); 728 if let Some(dentry) = self.get_deferred_dir() { 729 return Some(Ok(dentry)); 730 } 731 } 732 None 733 } 734 } 735 736 impl IntoIter { 737 /// Skips the current directory. 738 /// 739 /// This causes the iterator to stop traversing the contents of the least 740 /// recently yielded directory. This means any remaining entries in that 741 /// directory will be skipped (including sub-directories). 742 /// 743 /// Note that the ergonomics of this method are questionable since it 744 /// borrows the iterator mutably. Namely, you must write out the looping 745 /// condition manually. For example, to skip hidden entries efficiently on 746 /// unix systems: 747 /// 748 /// ```no_run 749 /// use walkdir::{DirEntry, WalkDir}; 750 /// 751 /// fn is_hidden(entry: &DirEntry) -> bool { 752 /// entry.file_name() 753 /// .to_str() 754 /// .map(|s| s.starts_with(".")) 755 /// .unwrap_or(false) 756 /// } 757 /// 758 /// let mut it = WalkDir::new("foo").into_iter(); 759 /// loop { 760 /// let entry = match it.next() { 761 /// None => break, 762 /// Some(Err(err)) => panic!("ERROR: {}", err), 763 /// Some(Ok(entry)) => entry, 764 /// }; 765 /// if is_hidden(&entry) { 766 /// if entry.file_type().is_dir() { 767 /// it.skip_current_dir(); 768 /// } 769 /// continue; 770 /// } 771 /// println!("{}", entry.path().display()); 772 /// } 773 /// ``` 774 /// 775 /// You may find it more convenient to use the [`filter_entry`] iterator 776 /// adapter. (See its documentation for the same example functionality as 777 /// above.) 778 /// 779 /// [`filter_entry`]: #method.filter_entry skip_current_dir(&mut self)780 pub fn skip_current_dir(&mut self) { 781 if !self.stack_list.is_empty() { 782 self.pop(); 783 } 784 } 785 786 /// Yields only entries which satisfy the given predicate and skips 787 /// descending into directories that do not satisfy the given predicate. 788 /// 789 /// The predicate is applied to all entries. If the predicate is 790 /// true, iteration carries on as normal. If the predicate is false, the 791 /// entry is ignored and if it is a directory, it is not descended into. 792 /// 793 /// This is often more convenient to use than [`skip_current_dir`]. For 794 /// example, to skip hidden files and directories efficiently on unix 795 /// systems: 796 /// 797 /// ```no_run 798 /// use walkdir::{DirEntry, WalkDir}; 799 /// # use walkdir::Error; 800 /// 801 /// fn is_hidden(entry: &DirEntry) -> bool { 802 /// entry.file_name() 803 /// .to_str() 804 /// .map(|s| s.starts_with(".")) 805 /// .unwrap_or(false) 806 /// } 807 /// 808 /// # fn try_main() -> Result<(), Error> { 809 /// for entry in WalkDir::new("foo") 810 /// .into_iter() 811 /// .filter_entry(|e| !is_hidden(e)) { 812 /// println!("{}", entry?.path().display()); 813 /// } 814 /// # Ok(()) 815 /// # } 816 /// ``` 817 /// 818 /// Note that the iterator will still yield errors for reading entries that 819 /// may not satisfy the predicate. 820 /// 821 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not 822 /// passed to this predicate. 823 /// 824 /// Note that if the iterator has `contents_first` enabled, then this 825 /// method is no different than calling the standard `Iterator::filter` 826 /// method (because directory entries are yielded after they've been 827 /// descended into). 828 /// 829 /// [`skip_current_dir`]: #method.skip_current_dir 830 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 831 /// [`max_depth`]: struct.WalkDir.html#method.max_depth filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> where P: FnMut(&DirEntry) -> bool,832 pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> 833 where 834 P: FnMut(&DirEntry) -> bool, 835 { 836 FilterEntry { it: self, predicate } 837 } 838 handle_entry( &mut self, mut dent: DirEntry, ) -> Option<Result<DirEntry>>839 fn handle_entry( 840 &mut self, 841 mut dent: DirEntry, 842 ) -> Option<Result<DirEntry>> { 843 if self.opts.follow_links && dent.file_type().is_symlink() { 844 dent = itry!(self.follow(dent)); 845 } 846 let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); 847 if is_normal_dir { 848 if self.opts.same_file_system && dent.depth() > 0 { 849 if itry!(self.is_same_file_system(&dent)) { 850 itry!(self.push(&dent)); 851 } 852 } else { 853 itry!(self.push(&dent)); 854 } 855 } else if dent.depth() == 0 856 && dent.file_type().is_symlink() 857 && self.opts.follow_root_links 858 { 859 // As a special case, if we are processing a root entry, then we 860 // always follow it even if it's a symlink and follow_links is 861 // false. We are careful to not let this change the semantics of 862 // the DirEntry however. Namely, the DirEntry should still respect 863 // the follow_links setting. When it's disabled, it should report 864 // itself as a symlink. When it's enabled, it should always report 865 // itself as the target. 866 let md = itry!(fs::metadata(dent.path()).map_err(|err| { 867 Error::from_path(dent.depth(), dent.path().to_path_buf(), err) 868 })); 869 if md.file_type().is_dir() { 870 itry!(self.push(&dent)); 871 } 872 } 873 if is_normal_dir && self.opts.contents_first { 874 self.deferred_dirs.push(dent); 875 None 876 } else if self.skippable() { 877 None 878 } else { 879 Some(Ok(dent)) 880 } 881 } 882 get_deferred_dir(&mut self) -> Option<DirEntry>883 fn get_deferred_dir(&mut self) -> Option<DirEntry> { 884 if self.opts.contents_first { 885 if self.depth < self.deferred_dirs.len() { 886 // Unwrap is safe here because we've guaranteed that 887 // `self.deferred_dirs.len()` can never be less than 1 888 let deferred: DirEntry = self 889 .deferred_dirs 890 .pop() 891 .expect("BUG: deferred_dirs should be non-empty"); 892 if !self.skippable() { 893 return Some(deferred); 894 } 895 } 896 } 897 None 898 } 899 push(&mut self, dent: &DirEntry) -> Result<()>900 fn push(&mut self, dent: &DirEntry) -> Result<()> { 901 // Make room for another open file descriptor if we've hit the max. 902 let free = 903 self.stack_list.len().checked_sub(self.oldest_opened).unwrap(); 904 if free == self.opts.max_open { 905 self.stack_list[self.oldest_opened].close(); 906 } 907 // Open a handle to reading the directory's entries. 908 let rd = fs::read_dir(dent.path()).map_err(|err| { 909 Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) 910 }); 911 let mut list = DirList::Opened { depth: self.depth, it: rd }; 912 if let Some(ref mut cmp) = self.opts.sorter { 913 let mut entries: Vec<_> = list.collect(); 914 entries.sort_by(|a, b| match (a, b) { 915 (&Ok(ref a), &Ok(ref b)) => cmp(a, b), 916 (&Err(_), &Err(_)) => Ordering::Equal, 917 (&Ok(_), &Err(_)) => Ordering::Greater, 918 (&Err(_), &Ok(_)) => Ordering::Less, 919 }); 920 list = DirList::Closed(entries.into_iter()); 921 } 922 if self.opts.follow_links { 923 let ancestor = Ancestor::new(&dent) 924 .map_err(|err| Error::from_io(self.depth, err))?; 925 self.stack_path.push(ancestor); 926 } 927 // We push this after stack_path since creating the Ancestor can fail. 928 // If it fails, then we return the error and won't descend. 929 self.stack_list.push(list); 930 // If we had to close out a previous directory stream, then we need to 931 // increment our index the oldest still-open stream. We do this only 932 // after adding to our stack, in order to ensure that the oldest_opened 933 // index remains valid. The worst that can happen is that an already 934 // closed stream will be closed again, which is a no-op. 935 // 936 // We could move the close of the stream above into this if-body, but 937 // then we would have more than the maximum number of file descriptors 938 // open at a particular point in time. 939 if free == self.opts.max_open { 940 // Unwrap is safe here because self.oldest_opened is guaranteed to 941 // never be greater than `self.stack_list.len()`, which implies 942 // that the subtraction won't underflow and that adding 1 will 943 // never overflow. 944 self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); 945 } 946 Ok(()) 947 } 948 pop(&mut self)949 fn pop(&mut self) { 950 self.stack_list.pop().expect("BUG: cannot pop from empty stack"); 951 if self.opts.follow_links { 952 self.stack_path.pop().expect("BUG: list/path stacks out of sync"); 953 } 954 // If everything in the stack is already closed, then there is 955 // room for at least one more open descriptor and it will 956 // always be at the top of the stack. 957 self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); 958 } 959 follow(&self, mut dent: DirEntry) -> Result<DirEntry>960 fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> { 961 dent = 962 DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?; 963 // The only way a symlink can cause a loop is if it points 964 // to a directory. Otherwise, it always points to a leaf 965 // and we can omit any loop checks. 966 if dent.is_dir() { 967 self.check_loop(dent.path())?; 968 } 969 Ok(dent) 970 } 971 check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()>972 fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> { 973 let hchild = Handle::from_path(&child) 974 .map_err(|err| Error::from_io(self.depth, err))?; 975 for ancestor in self.stack_path.iter().rev() { 976 let is_same = ancestor 977 .is_same(&hchild) 978 .map_err(|err| Error::from_io(self.depth, err))?; 979 if is_same { 980 return Err(Error::from_loop( 981 self.depth, 982 &ancestor.path, 983 child.as_ref(), 984 )); 985 } 986 } 987 Ok(()) 988 } 989 is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool>990 fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> { 991 let dent_device = util::device_num(dent.path()) 992 .map_err(|err| Error::from_entry(dent, err))?; 993 Ok(self 994 .root_device 995 .map(|d| d == dent_device) 996 .expect("BUG: called is_same_file_system without root device")) 997 } 998 skippable(&self) -> bool999 fn skippable(&self) -> bool { 1000 self.depth < self.opts.min_depth || self.depth > self.opts.max_depth 1001 } 1002 } 1003 1004 impl DirList { close(&mut self)1005 fn close(&mut self) { 1006 if let DirList::Opened { .. } = *self { 1007 *self = DirList::Closed(self.collect::<Vec<_>>().into_iter()); 1008 } 1009 } 1010 } 1011 1012 impl Iterator for DirList { 1013 type Item = Result<DirEntry>; 1014 1015 #[inline(always)] next(&mut self) -> Option<Result<DirEntry>>1016 fn next(&mut self) -> Option<Result<DirEntry>> { 1017 match *self { 1018 DirList::Closed(ref mut it) => it.next(), 1019 DirList::Opened { depth, ref mut it } => match *it { 1020 Err(ref mut err) => err.take().map(Err), 1021 Ok(ref mut rd) => rd.next().map(|r| match r { 1022 Ok(r) => DirEntry::from_entry(depth + 1, &r), 1023 Err(err) => Err(Error::from_io(depth + 1, err)), 1024 }), 1025 }, 1026 } 1027 } 1028 } 1029 1030 /// A recursive directory iterator that skips entries. 1031 /// 1032 /// Values of this type are created by calling [`.filter_entry()`] on an 1033 /// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. 1034 /// 1035 /// Directories that fail the predicate `P` are skipped. Namely, they are 1036 /// never yielded and never descended into. 1037 /// 1038 /// Entries that are skipped with the [`min_depth`] and [`max_depth`] options 1039 /// are not passed through this filter. 1040 /// 1041 /// If opening a handle to a directory resulted in an error, then it is yielded 1042 /// and no corresponding call to the predicate is made. 1043 /// 1044 /// Type parameter `I` refers to the underlying iterator and `P` refers to the 1045 /// predicate, which is usually `FnMut(&DirEntry) -> bool`. 1046 /// 1047 /// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry 1048 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v 1049 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 1050 /// [`max_depth`]: struct.WalkDir.html#method.max_depth 1051 #[derive(Debug)] 1052 pub struct FilterEntry<I, P> { 1053 it: I, 1054 predicate: P, 1055 } 1056 1057 impl<P> Iterator for FilterEntry<IntoIter, P> 1058 where 1059 P: FnMut(&DirEntry) -> bool, 1060 { 1061 type Item = Result<DirEntry>; 1062 1063 /// Advances the iterator and returns the next value. 1064 /// 1065 /// # Errors 1066 /// 1067 /// If the iterator fails to retrieve the next value, this method returns 1068 /// an error value. The error will be wrapped in an `Option::Some`. next(&mut self) -> Option<Result<DirEntry>>1069 fn next(&mut self) -> Option<Result<DirEntry>> { 1070 loop { 1071 let dent = match self.it.next() { 1072 None => return None, 1073 Some(result) => itry!(result), 1074 }; 1075 if !(self.predicate)(&dent) { 1076 if dent.is_dir() { 1077 self.it.skip_current_dir(); 1078 } 1079 continue; 1080 } 1081 return Some(Ok(dent)); 1082 } 1083 } 1084 } 1085 1086 impl<P> FilterEntry<IntoIter, P> 1087 where 1088 P: FnMut(&DirEntry) -> bool, 1089 { 1090 /// Yields only entries which satisfy the given predicate and skips 1091 /// descending into directories that do not satisfy the given predicate. 1092 /// 1093 /// The predicate is applied to all entries. If the predicate is 1094 /// true, iteration carries on as normal. If the predicate is false, the 1095 /// entry is ignored and if it is a directory, it is not descended into. 1096 /// 1097 /// This is often more convenient to use than [`skip_current_dir`]. For 1098 /// example, to skip hidden files and directories efficiently on unix 1099 /// systems: 1100 /// 1101 /// ```no_run 1102 /// use walkdir::{DirEntry, WalkDir}; 1103 /// # use walkdir::Error; 1104 /// 1105 /// fn is_hidden(entry: &DirEntry) -> bool { 1106 /// entry.file_name() 1107 /// .to_str() 1108 /// .map(|s| s.starts_with(".")) 1109 /// .unwrap_or(false) 1110 /// } 1111 /// 1112 /// # fn try_main() -> Result<(), Error> { 1113 /// for entry in WalkDir::new("foo") 1114 /// .into_iter() 1115 /// .filter_entry(|e| !is_hidden(e)) { 1116 /// println!("{}", entry?.path().display()); 1117 /// } 1118 /// # Ok(()) 1119 /// # } 1120 /// ``` 1121 /// 1122 /// Note that the iterator will still yield errors for reading entries that 1123 /// may not satisfy the predicate. 1124 /// 1125 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not 1126 /// passed to this predicate. 1127 /// 1128 /// Note that if the iterator has `contents_first` enabled, then this 1129 /// method is no different than calling the standard `Iterator::filter` 1130 /// method (because directory entries are yielded after they've been 1131 /// descended into). 1132 /// 1133 /// [`skip_current_dir`]: #method.skip_current_dir 1134 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 1135 /// [`max_depth`]: struct.WalkDir.html#method.max_depth filter_entry(self, predicate: P) -> FilterEntry<Self, P>1136 pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> { 1137 FilterEntry { it: self, predicate } 1138 } 1139 1140 /// Skips the current directory. 1141 /// 1142 /// This causes the iterator to stop traversing the contents of the least 1143 /// recently yielded directory. This means any remaining entries in that 1144 /// directory will be skipped (including sub-directories). 1145 /// 1146 /// Note that the ergonomics of this method are questionable since it 1147 /// borrows the iterator mutably. Namely, you must write out the looping 1148 /// condition manually. For example, to skip hidden entries efficiently on 1149 /// unix systems: 1150 /// 1151 /// ```no_run 1152 /// use walkdir::{DirEntry, WalkDir}; 1153 /// 1154 /// fn is_hidden(entry: &DirEntry) -> bool { 1155 /// entry.file_name() 1156 /// .to_str() 1157 /// .map(|s| s.starts_with(".")) 1158 /// .unwrap_or(false) 1159 /// } 1160 /// 1161 /// let mut it = WalkDir::new("foo").into_iter(); 1162 /// loop { 1163 /// let entry = match it.next() { 1164 /// None => break, 1165 /// Some(Err(err)) => panic!("ERROR: {}", err), 1166 /// Some(Ok(entry)) => entry, 1167 /// }; 1168 /// if is_hidden(&entry) { 1169 /// if entry.file_type().is_dir() { 1170 /// it.skip_current_dir(); 1171 /// } 1172 /// continue; 1173 /// } 1174 /// println!("{}", entry.path().display()); 1175 /// } 1176 /// ``` 1177 /// 1178 /// You may find it more convenient to use the [`filter_entry`] iterator 1179 /// adapter. (See its documentation for the same example functionality as 1180 /// above.) 1181 /// 1182 /// [`filter_entry`]: #method.filter_entry skip_current_dir(&mut self)1183 pub fn skip_current_dir(&mut self) { 1184 self.it.skip_current_dir(); 1185 } 1186 } 1187