1 /*!
2 Crate `walkdir` provides an efficient and cross platform implementation
3 of recursive directory traversal. Several options are exposed to control
4 iteration, such as whether to follow symbolic links (default off), limit the
5 maximum number of simultaneous open file descriptors and the ability to
6 efficiently skip descending into directories.
7 
8 To use this crate, add `walkdir` as a dependency to your project's
9 `Cargo.toml`:
10 
11 ```toml
12 [dependencies]
13 walkdir = "2"
14 ```
15 
16 # From the top
17 
18 The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
19 yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
20 [`std::io::Error`] with additional information, such as if a loop was detected
21 while following symbolic links (not enabled by default).
22 
23 [`WalkDir`]: struct.WalkDir.html
24 [`DirEntry`]: struct.DirEntry.html
25 [`Error`]: struct.Error.html
26 [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
27 
28 # Example
29 
30 The following code recursively iterates over the directory given and prints
31 the path for each entry:
32 
33 ```no_run
34 use walkdir::WalkDir;
35 # use walkdir::Error;
36 
37 # fn try_main() -> Result<(), Error> {
38 for entry in WalkDir::new("foo") {
39     println!("{}", entry?.path().display());
40 }
41 # Ok(())
42 # }
43 ```
44 
45 Or, if you'd like to iterate over all entries and ignore any errors that
46 may arise, use [`filter_map`]. (e.g., This code below will silently skip
47 directories that the owner of the running process does not have permission to
48 access.)
49 
50 ```no_run
51 use walkdir::WalkDir;
52 
53 for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
54     println!("{}", entry.path().display());
55 }
56 ```
57 
58 [`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
59 
60 # Example: follow symbolic links
61 
62 The same code as above, except [`follow_links`] is enabled:
63 
64 ```no_run
65 use walkdir::WalkDir;
66 # use walkdir::Error;
67 
68 # fn try_main() -> Result<(), Error> {
69 for entry in WalkDir::new("foo").follow_links(true) {
70     println!("{}", entry?.path().display());
71 }
72 # Ok(())
73 # }
74 ```
75 
76 [`follow_links`]: struct.WalkDir.html#method.follow_links
77 
78 # Example: skip hidden files and directories on unix
79 
80 This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
81 and directories efficiently (i.e. without recursing into hidden directories):
82 
83 ```no_run
84 use walkdir::{DirEntry, WalkDir};
85 # use walkdir::Error;
86 
87 fn is_hidden(entry: &DirEntry) -> bool {
88     entry.file_name()
89          .to_str()
90          .map(|s| s.starts_with("."))
91          .unwrap_or(false)
92 }
93 
94 # fn try_main() -> Result<(), Error> {
95 let walker = WalkDir::new("foo").into_iter();
96 for entry in walker.filter_entry(|e| !is_hidden(e)) {
97     println!("{}", entry?.path().display());
98 }
99 # Ok(())
100 # }
101 ```
102 
103 [`filter_entry`]: struct.IntoIter.html#method.filter_entry
104 */
105 
106 #![deny(missing_docs)]
107 #![allow(unknown_lints)]
108 
109 #[cfg(doctest)]
110 doc_comment::doctest!("../README.md");
111 
112 use std::cmp::{min, Ordering};
113 use std::fmt;
114 use std::fs::{self, ReadDir};
115 use std::io;
116 use std::path::{Path, PathBuf};
117 use std::result;
118 use std::vec;
119 
120 use same_file::Handle;
121 
122 pub use crate::dent::DirEntry;
123 #[cfg(unix)]
124 pub use crate::dent::DirEntryExt;
125 pub use crate::error::Error;
126 
127 mod dent;
128 mod error;
129 #[cfg(test)]
130 mod tests;
131 mod util;
132 
133 /// Like try, but for iterators that return [`Option<Result<_, _>>`].
134 ///
135 /// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
136 macro_rules! itry {
137     ($e:expr) => {
138         match $e {
139             Ok(v) => v,
140             Err(err) => return Some(Err(From::from(err))),
141         }
142     };
143 }
144 
145 /// A result type for walkdir operations.
146 ///
147 /// Note that this result type embeds the error type in this crate. This
148 /// is only useful if you care about the additional information provided by
149 /// the error (such as the path associated with the error or whether a loop
150 /// was dectected). If you want things to Just Work, then you can use
151 /// [`io::Result`] instead since the error type in this package will
152 /// automatically convert to an [`io::Result`] when using the [`try!`] macro.
153 ///
154 /// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
155 /// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
156 pub type Result<T> = ::std::result::Result<T, Error>;
157 
158 /// A builder to create an iterator for recursively walking a directory.
159 ///
160 /// Results are returned in depth first fashion, with directories yielded
161 /// before their contents. If [`contents_first`] is true, contents are yielded
162 /// before their directories. The order is unspecified but if [`sort_by`] is
163 /// given, directory entries are sorted according to this function. Directory
164 /// entries `.` and `..` are always omitted.
165 ///
166 /// If an error occurs at any point during iteration, then it is returned in
167 /// place of its corresponding directory entry and iteration continues as
168 /// normal. If an error occurs while opening a directory for reading, then it
169 /// is not descended into (but the error is still yielded by the iterator).
170 /// Iteration may be stopped at any time. When the iterator is destroyed, all
171 /// resources associated with it are freed.
172 ///
173 /// [`contents_first`]: struct.WalkDir.html#method.contents_first
174 /// [`sort_by`]: struct.WalkDir.html#method.sort_by
175 ///
176 /// # Usage
177 ///
178 /// This type implements [`IntoIterator`] so that it may be used as the subject
179 /// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
180 /// to use iterator adapters such as [`filter_entry`].
181 ///
182 /// Idiomatic use of this type should use method chaining to set desired
183 /// options. For example, this only shows entries with a depth of `1`, `2` or
184 /// `3` (relative to `foo`):
185 ///
186 /// ```no_run
187 /// use walkdir::WalkDir;
188 /// # use walkdir::Error;
189 ///
190 /// # fn try_main() -> Result<(), Error> {
191 /// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
192 ///     println!("{}", entry?.path().display());
193 /// }
194 /// # Ok(())
195 /// # }
196 /// ```
197 ///
198 /// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
199 /// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
200 /// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
201 ///
202 /// Note that the iterator by default includes the top-most directory. Since
203 /// this is the only directory yielded with depth `0`, it is easy to ignore it
204 /// with the [`min_depth`] setting:
205 ///
206 /// ```no_run
207 /// use walkdir::WalkDir;
208 /// # use walkdir::Error;
209 ///
210 /// # fn try_main() -> Result<(), Error> {
211 /// for entry in WalkDir::new("foo").min_depth(1) {
212 ///     println!("{}", entry?.path().display());
213 /// }
214 /// # Ok(())
215 /// # }
216 /// ```
217 ///
218 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
219 ///
220 /// This will only return descendents of the `foo` directory and not `foo`
221 /// itself.
222 ///
223 /// # Loops
224 ///
225 /// This iterator (like most/all recursive directory iterators) assumes that
226 /// no loops can be made with *hard* links on your file system. In particular,
227 /// this would require creating a hard link to a directory such that it creates
228 /// a loop. On most platforms, this operation is illegal.
229 ///
230 /// Note that when following symbolic/soft links, loops are detected and an
231 /// error is reported.
232 #[derive(Debug)]
233 pub struct WalkDir {
234     opts: WalkDirOptions,
235     root: PathBuf,
236 }
237 
238 struct WalkDirOptions {
239     follow_links: bool,
240     follow_root_links: bool,
241     max_open: usize,
242     min_depth: usize,
243     max_depth: usize,
244     sorter: Option<
245         Box<
246             dyn FnMut(&DirEntry, &DirEntry) -> Ordering
247                 + Send
248                 + Sync
249                 + 'static,
250         >,
251     >,
252     contents_first: bool,
253     same_file_system: bool,
254 }
255 
256 impl fmt::Debug for WalkDirOptions {
fmt( &self, f: &mut fmt::Formatter<'_>, ) -> result::Result<(), fmt::Error>257     fn fmt(
258         &self,
259         f: &mut fmt::Formatter<'_>,
260     ) -> result::Result<(), fmt::Error> {
261         let sorter_str = if self.sorter.is_some() {
262             // FnMut isn't `Debug`
263             "Some(...)"
264         } else {
265             "None"
266         };
267         f.debug_struct("WalkDirOptions")
268             .field("follow_links", &self.follow_links)
269             .field("follow_root_link", &self.follow_root_links)
270             .field("max_open", &self.max_open)
271             .field("min_depth", &self.min_depth)
272             .field("max_depth", &self.max_depth)
273             .field("sorter", &sorter_str)
274             .field("contents_first", &self.contents_first)
275             .field("same_file_system", &self.same_file_system)
276             .finish()
277     }
278 }
279 
280 impl WalkDir {
281     /// Create a builder for a recursive directory iterator starting at the
282     /// file path `root`. If `root` is a directory, then it is the first item
283     /// yielded by the iterator. If `root` is a file, then it is the first
284     /// and only item yielded by the iterator. If `root` is a symlink, then it
285     /// is always followed for the purposes of directory traversal. (A root
286     /// `DirEntry` still obeys its documentation with respect to symlinks and
287     /// the `follow_links` setting.)
new<P: AsRef<Path>>(root: P) -> Self288     pub fn new<P: AsRef<Path>>(root: P) -> Self {
289         WalkDir {
290             opts: WalkDirOptions {
291                 follow_links: false,
292                 follow_root_links: true,
293                 max_open: 10,
294                 min_depth: 0,
295                 max_depth: ::std::usize::MAX,
296                 sorter: None,
297                 contents_first: false,
298                 same_file_system: false,
299             },
300             root: root.as_ref().to_path_buf(),
301         }
302     }
303 
304     /// Set the minimum depth of entries yielded by the iterator.
305     ///
306     /// The smallest depth is `0` and always corresponds to the path given
307     /// to the `new` function on this type. Its direct descendents have depth
308     /// `1`, and their descendents have depth `2`, and so on.
min_depth(mut self, depth: usize) -> Self309     pub fn min_depth(mut self, depth: usize) -> Self {
310         self.opts.min_depth = depth;
311         if self.opts.min_depth > self.opts.max_depth {
312             self.opts.min_depth = self.opts.max_depth;
313         }
314         self
315     }
316 
317     /// Set the maximum depth of entries yield by the iterator.
318     ///
319     /// The smallest depth is `0` and always corresponds to the path given
320     /// to the `new` function on this type. Its direct descendents have depth
321     /// `1`, and their descendents have depth `2`, and so on.
322     ///
323     /// Note that this will not simply filter the entries of the iterator, but
324     /// it will actually avoid descending into directories when the depth is
325     /// exceeded.
max_depth(mut self, depth: usize) -> Self326     pub fn max_depth(mut self, depth: usize) -> Self {
327         self.opts.max_depth = depth;
328         if self.opts.max_depth < self.opts.min_depth {
329             self.opts.max_depth = self.opts.min_depth;
330         }
331         self
332     }
333 
334     /// Follow symbolic links. By default, this is disabled.
335     ///
336     /// When `yes` is `true`, symbolic links are followed as if they were
337     /// normal directories and files. If a symbolic link is broken or is
338     /// involved in a loop, an error is yielded.
339     ///
340     /// When enabled, the yielded [`DirEntry`] values represent the target of
341     /// the link while the path corresponds to the link. See the [`DirEntry`]
342     /// type for more details.
343     ///
344     /// [`DirEntry`]: struct.DirEntry.html
follow_links(mut self, yes: bool) -> Self345     pub fn follow_links(mut self, yes: bool) -> Self {
346         self.opts.follow_links = yes;
347         self
348     }
349 
350     /// Follow symbolic links if these are the root of the traversal.
351     /// By default, this is enabled.
352     ///
353     /// When `yes` is `true`, symbolic links on root paths are followed
354     /// which is effective if the symbolic link points to a directory.
355     /// If a symbolic link is broken or is involved in a loop, an error is yielded
356     /// as the first entry of the traversal.
357     ///
358     /// When enabled, the yielded [`DirEntry`] values represent the target of
359     /// the link while the path corresponds to the link. See the [`DirEntry`]
360     /// type for more details, and all future entries will be contained within
361     /// the resolved directory behind the symbolic link of the root path.
362     ///
363     /// [`DirEntry`]: struct.DirEntry.html
follow_root_links(mut self, yes: bool) -> Self364     pub fn follow_root_links(mut self, yes: bool) -> Self {
365         self.opts.follow_root_links = yes;
366         self
367     }
368 
369     /// Set the maximum number of simultaneously open file descriptors used
370     /// by the iterator.
371     ///
372     /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
373     /// to `1` automatically. If this is not set, then it defaults to some
374     /// reasonably low number.
375     ///
376     /// This setting has no impact on the results yielded by the iterator
377     /// (even when `n` is `1`). Instead, this setting represents a trade off
378     /// between scarce resources (file descriptors) and memory. Namely, when
379     /// the maximum number of file descriptors is reached and a new directory
380     /// needs to be opened to continue iteration, then a previous directory
381     /// handle is closed and has its unyielded entries stored in memory. In
382     /// practice, this is a satisfying trade off because it scales with respect
383     /// to the *depth* of your file tree. Therefore, low values (even `1`) are
384     /// acceptable.
385     ///
386     /// Note that this value does not impact the number of system calls made by
387     /// an exhausted iterator.
388     ///
389     /// # Platform behavior
390     ///
391     /// On Windows, if `follow_links` is enabled, then this limit is not
392     /// respected. In particular, the maximum number of file descriptors opened
393     /// is proportional to the depth of the directory tree traversed.
max_open(mut self, mut n: usize) -> Self394     pub fn max_open(mut self, mut n: usize) -> Self {
395         if n == 0 {
396             n = 1;
397         }
398         self.opts.max_open = n;
399         self
400     }
401 
402     /// Set a function for sorting directory entries with a comparator
403     /// function.
404     ///
405     /// If a compare function is set, the resulting iterator will return all
406     /// paths in sorted order. The compare function will be called to compare
407     /// entries from the same directory.
408     ///
409     /// ```rust,no_run
410     /// use std::cmp;
411     /// use std::ffi::OsString;
412     /// use walkdir::WalkDir;
413     ///
414     /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
415     /// ```
sort_by<F>(mut self, cmp: F) -> Self where F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,416     pub fn sort_by<F>(mut self, cmp: F) -> Self
417     where
418         F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
419     {
420         self.opts.sorter = Some(Box::new(cmp));
421         self
422     }
423 
424     /// Set a function for sorting directory entries with a key extraction
425     /// function.
426     ///
427     /// If a compare function is set, the resulting iterator will return all
428     /// paths in sorted order. The compare function will be called to compare
429     /// entries from the same directory.
430     ///
431     /// ```rust,no_run
432     /// use std::cmp;
433     /// use std::ffi::OsString;
434     /// use walkdir::WalkDir;
435     ///
436     /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned());
437     /// ```
sort_by_key<K, F>(self, mut cmp: F) -> Self where F: FnMut(&DirEntry) -> K + Send + Sync + 'static, K: Ord,438     pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self
439     where
440         F: FnMut(&DirEntry) -> K + Send + Sync + 'static,
441         K: Ord,
442     {
443         self.sort_by(move |a, b| cmp(a).cmp(&cmp(b)))
444     }
445 
446     /// Sort directory entries by file name, to ensure a deterministic order.
447     ///
448     /// This is a convenience function for calling `Self::sort_by()`.
449     ///
450     /// ```rust,no_run
451     /// use walkdir::WalkDir;
452     ///
453     /// WalkDir::new("foo").sort_by_file_name();
454     /// ```
sort_by_file_name(self) -> Self455     pub fn sort_by_file_name(self) -> Self {
456         self.sort_by(|a, b| a.file_name().cmp(b.file_name()))
457     }
458 
459     /// Yield a directory's contents before the directory itself. By default,
460     /// this is disabled.
461     ///
462     /// When `yes` is `false` (as is the default), the directory is yielded
463     /// before its contents are read. This is useful when, e.g. you want to
464     /// skip processing of some directories.
465     ///
466     /// When `yes` is `true`, the iterator yields the contents of a directory
467     /// before yielding the directory itself. This is useful when, e.g. you
468     /// want to recursively delete a directory.
469     ///
470     /// # Example
471     ///
472     /// Assume the following directory tree:
473     ///
474     /// ```text
475     /// foo/
476     ///   abc/
477     ///     qrs
478     ///     tuv
479     ///   def/
480     /// ```
481     ///
482     /// With contents_first disabled (the default), the following code visits
483     /// the directory tree in depth-first order:
484     ///
485     /// ```no_run
486     /// use walkdir::WalkDir;
487     ///
488     /// for entry in WalkDir::new("foo") {
489     ///     let entry = entry.unwrap();
490     ///     println!("{}", entry.path().display());
491     /// }
492     ///
493     /// // foo
494     /// // foo/abc
495     /// // foo/abc/qrs
496     /// // foo/abc/tuv
497     /// // foo/def
498     /// ```
499     ///
500     /// With contents_first enabled:
501     ///
502     /// ```no_run
503     /// use walkdir::WalkDir;
504     ///
505     /// for entry in WalkDir::new("foo").contents_first(true) {
506     ///     let entry = entry.unwrap();
507     ///     println!("{}", entry.path().display());
508     /// }
509     ///
510     /// // foo/abc/qrs
511     /// // foo/abc/tuv
512     /// // foo/abc
513     /// // foo/def
514     /// // foo
515     /// ```
contents_first(mut self, yes: bool) -> Self516     pub fn contents_first(mut self, yes: bool) -> Self {
517         self.opts.contents_first = yes;
518         self
519     }
520 
521     /// Do not cross file system boundaries.
522     ///
523     /// When this option is enabled, directory traversal will not descend into
524     /// directories that are on a different file system from the root path.
525     ///
526     /// Currently, this option is only supported on Unix and Windows. If this
527     /// option is used on an unsupported platform, then directory traversal
528     /// will immediately return an error and will not yield any entries.
same_file_system(mut self, yes: bool) -> Self529     pub fn same_file_system(mut self, yes: bool) -> Self {
530         self.opts.same_file_system = yes;
531         self
532     }
533 }
534 
535 impl IntoIterator for WalkDir {
536     type Item = Result<DirEntry>;
537     type IntoIter = IntoIter;
538 
into_iter(self) -> IntoIter539     fn into_iter(self) -> IntoIter {
540         IntoIter {
541             opts: self.opts,
542             start: Some(self.root),
543             stack_list: vec![],
544             stack_path: vec![],
545             oldest_opened: 0,
546             depth: 0,
547             deferred_dirs: vec![],
548             root_device: None,
549         }
550     }
551 }
552 
553 /// An iterator for recursively descending into a directory.
554 ///
555 /// A value with this type must be constructed with the [`WalkDir`] type, which
556 /// uses a builder pattern to set options such as min/max depth, max open file
557 /// descriptors and whether the iterator should follow symbolic links. After
558 /// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
559 ///
560 /// The order of elements yielded by this iterator is unspecified.
561 ///
562 /// [`WalkDir`]: struct.WalkDir.html
563 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
564 #[derive(Debug)]
565 pub struct IntoIter {
566     /// Options specified in the builder. Depths, max fds, etc.
567     opts: WalkDirOptions,
568     /// The start path.
569     ///
570     /// This is only `Some(...)` at the beginning. After the first iteration,
571     /// this is always `None`.
572     start: Option<PathBuf>,
573     /// A stack of open (up to max fd) or closed handles to directories.
574     /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
575     /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
576     ///
577     /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
578     stack_list: Vec<DirList>,
579     /// A stack of file paths.
580     ///
581     /// This is *only* used when [`follow_links`] is enabled. In all other
582     /// cases this stack is empty.
583     ///
584     /// [`follow_links`]: struct.WalkDir.html#method.follow_links
585     stack_path: Vec<Ancestor>,
586     /// An index into `stack_list` that points to the oldest open directory
587     /// handle. If the maximum fd limit is reached and a new directory needs to
588     /// be read, the handle at this index is closed before the new directory is
589     /// opened.
590     oldest_opened: usize,
591     /// The current depth of iteration (the length of the stack at the
592     /// beginning of each iteration).
593     depth: usize,
594     /// A list of DirEntries corresponding to directories, that are
595     /// yielded after their contents has been fully yielded. This is only
596     /// used when `contents_first` is enabled.
597     deferred_dirs: Vec<DirEntry>,
598     /// The device of the root file path when the first call to `next` was
599     /// made.
600     ///
601     /// If the `same_file_system` option isn't enabled, then this is always
602     /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
603     /// handling the root path.
604     root_device: Option<u64>,
605 }
606 
607 /// An ancestor is an item in the directory tree traversed by walkdir, and is
608 /// used to check for loops in the tree when traversing symlinks.
609 #[derive(Debug)]
610 struct Ancestor {
611     /// The path of this ancestor.
612     path: PathBuf,
613     /// An open file to this ancesor. This is only used on Windows where
614     /// opening a file handle appears to be quite expensive, so we choose to
615     /// cache it. This comes at the cost of not respecting the file descriptor
616     /// limit set by the user.
617     #[cfg(windows)]
618     handle: Handle,
619 }
620 
621 impl Ancestor {
622     /// Create a new ancestor from the given directory path.
623     #[cfg(windows)]
new(dent: &DirEntry) -> io::Result<Ancestor>624     fn new(dent: &DirEntry) -> io::Result<Ancestor> {
625         let handle = Handle::from_path(dent.path())?;
626         Ok(Ancestor { path: dent.path().to_path_buf(), handle })
627     }
628 
629     /// Create a new ancestor from the given directory path.
630     #[cfg(not(windows))]
new(dent: &DirEntry) -> io::Result<Ancestor>631     fn new(dent: &DirEntry) -> io::Result<Ancestor> {
632         Ok(Ancestor { path: dent.path().to_path_buf() })
633     }
634 
635     /// Returns true if and only if the given open file handle corresponds to
636     /// the same directory as this ancestor.
637     #[cfg(windows)]
is_same(&self, child: &Handle) -> io::Result<bool>638     fn is_same(&self, child: &Handle) -> io::Result<bool> {
639         Ok(child == &self.handle)
640     }
641 
642     /// Returns true if and only if the given open file handle corresponds to
643     /// the same directory as this ancestor.
644     #[cfg(not(windows))]
is_same(&self, child: &Handle) -> io::Result<bool>645     fn is_same(&self, child: &Handle) -> io::Result<bool> {
646         Ok(child == &Handle::from_path(&self.path)?)
647     }
648 }
649 
650 /// A sequence of unconsumed directory entries.
651 ///
652 /// This represents the opened or closed state of a directory handle. When
653 /// open, future entries are read by iterating over the raw `fs::ReadDir`.
654 /// When closed, all future entries are read into memory. Iteration then
655 /// proceeds over a [`Vec<fs::DirEntry>`].
656 ///
657 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
658 /// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
659 #[derive(Debug)]
660 enum DirList {
661     /// An opened handle.
662     ///
663     /// This includes the depth of the handle itself.
664     ///
665     /// If there was an error with the initial [`fs::read_dir`] call, then it
666     /// is stored here. (We use an [`Option<...>`] to make yielding the error
667     /// exactly once simpler.)
668     ///
669     /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
670     /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
671     Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
672     /// A closed handle.
673     ///
674     /// All remaining directory entries are read into memory.
675     Closed(vec::IntoIter<Result<DirEntry>>),
676 }
677 
678 impl Iterator for IntoIter {
679     type Item = Result<DirEntry>;
680     /// Advances the iterator and returns the next value.
681     ///
682     /// # Errors
683     ///
684     /// If the iterator fails to retrieve the next value, this method returns
685     /// an error value. The error will be wrapped in an Option::Some.
next(&mut self) -> Option<Result<DirEntry>>686     fn next(&mut self) -> Option<Result<DirEntry>> {
687         if let Some(start) = self.start.take() {
688             if self.opts.same_file_system {
689                 let result = util::device_num(&start)
690                     .map_err(|e| Error::from_path(0, start.clone(), e));
691                 self.root_device = Some(itry!(result));
692             }
693             let dent = itry!(DirEntry::from_path(0, start, false));
694             if let Some(result) = self.handle_entry(dent) {
695                 return Some(result);
696             }
697         }
698         while !self.stack_list.is_empty() {
699             self.depth = self.stack_list.len();
700             if let Some(dentry) = self.get_deferred_dir() {
701                 return Some(Ok(dentry));
702             }
703             if self.depth > self.opts.max_depth {
704                 // If we've exceeded the max depth, pop the current dir
705                 // so that we don't descend.
706                 self.pop();
707                 continue;
708             }
709             // Unwrap is safe here because we've verified above that
710             // `self.stack_list` is not empty
711             let next = self
712                 .stack_list
713                 .last_mut()
714                 .expect("BUG: stack should be non-empty")
715                 .next();
716             match next {
717                 None => self.pop(),
718                 Some(Err(err)) => return Some(Err(err)),
719                 Some(Ok(dent)) => {
720                     if let Some(result) = self.handle_entry(dent) {
721                         return Some(result);
722                     }
723                 }
724             }
725         }
726         if self.opts.contents_first {
727             self.depth = self.stack_list.len();
728             if let Some(dentry) = self.get_deferred_dir() {
729                 return Some(Ok(dentry));
730             }
731         }
732         None
733     }
734 }
735 
736 impl IntoIter {
737     /// Skips the current directory.
738     ///
739     /// This causes the iterator to stop traversing the contents of the least
740     /// recently yielded directory. This means any remaining entries in that
741     /// directory will be skipped (including sub-directories).
742     ///
743     /// Note that the ergonomics of this method are questionable since it
744     /// borrows the iterator mutably. Namely, you must write out the looping
745     /// condition manually. For example, to skip hidden entries efficiently on
746     /// unix systems:
747     ///
748     /// ```no_run
749     /// use walkdir::{DirEntry, WalkDir};
750     ///
751     /// fn is_hidden(entry: &DirEntry) -> bool {
752     ///     entry.file_name()
753     ///          .to_str()
754     ///          .map(|s| s.starts_with("."))
755     ///          .unwrap_or(false)
756     /// }
757     ///
758     /// let mut it = WalkDir::new("foo").into_iter();
759     /// loop {
760     ///     let entry = match it.next() {
761     ///         None => break,
762     ///         Some(Err(err)) => panic!("ERROR: {}", err),
763     ///         Some(Ok(entry)) => entry,
764     ///     };
765     ///     if is_hidden(&entry) {
766     ///         if entry.file_type().is_dir() {
767     ///             it.skip_current_dir();
768     ///         }
769     ///         continue;
770     ///     }
771     ///     println!("{}", entry.path().display());
772     /// }
773     /// ```
774     ///
775     /// You may find it more convenient to use the [`filter_entry`] iterator
776     /// adapter. (See its documentation for the same example functionality as
777     /// above.)
778     ///
779     /// [`filter_entry`]: #method.filter_entry
skip_current_dir(&mut self)780     pub fn skip_current_dir(&mut self) {
781         if !self.stack_list.is_empty() {
782             self.pop();
783         }
784     }
785 
786     /// Yields only entries which satisfy the given predicate and skips
787     /// descending into directories that do not satisfy the given predicate.
788     ///
789     /// The predicate is applied to all entries. If the predicate is
790     /// true, iteration carries on as normal. If the predicate is false, the
791     /// entry is ignored and if it is a directory, it is not descended into.
792     ///
793     /// This is often more convenient to use than [`skip_current_dir`]. For
794     /// example, to skip hidden files and directories efficiently on unix
795     /// systems:
796     ///
797     /// ```no_run
798     /// use walkdir::{DirEntry, WalkDir};
799     /// # use walkdir::Error;
800     ///
801     /// fn is_hidden(entry: &DirEntry) -> bool {
802     ///     entry.file_name()
803     ///          .to_str()
804     ///          .map(|s| s.starts_with("."))
805     ///          .unwrap_or(false)
806     /// }
807     ///
808     /// # fn try_main() -> Result<(), Error> {
809     /// for entry in WalkDir::new("foo")
810     ///                      .into_iter()
811     ///                      .filter_entry(|e| !is_hidden(e)) {
812     ///     println!("{}", entry?.path().display());
813     /// }
814     /// # Ok(())
815     /// # }
816     /// ```
817     ///
818     /// Note that the iterator will still yield errors for reading entries that
819     /// may not satisfy the predicate.
820     ///
821     /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
822     /// passed to this predicate.
823     ///
824     /// Note that if the iterator has `contents_first` enabled, then this
825     /// method is no different than calling the standard `Iterator::filter`
826     /// method (because directory entries are yielded after they've been
827     /// descended into).
828     ///
829     /// [`skip_current_dir`]: #method.skip_current_dir
830     /// [`min_depth`]: struct.WalkDir.html#method.min_depth
831     /// [`max_depth`]: struct.WalkDir.html#method.max_depth
filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> where P: FnMut(&DirEntry) -> bool,832     pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
833     where
834         P: FnMut(&DirEntry) -> bool,
835     {
836         FilterEntry { it: self, predicate }
837     }
838 
handle_entry( &mut self, mut dent: DirEntry, ) -> Option<Result<DirEntry>>839     fn handle_entry(
840         &mut self,
841         mut dent: DirEntry,
842     ) -> Option<Result<DirEntry>> {
843         if self.opts.follow_links && dent.file_type().is_symlink() {
844             dent = itry!(self.follow(dent));
845         }
846         let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
847         if is_normal_dir {
848             if self.opts.same_file_system && dent.depth() > 0 {
849                 if itry!(self.is_same_file_system(&dent)) {
850                     itry!(self.push(&dent));
851                 }
852             } else {
853                 itry!(self.push(&dent));
854             }
855         } else if dent.depth() == 0
856             && dent.file_type().is_symlink()
857             && self.opts.follow_root_links
858         {
859             // As a special case, if we are processing a root entry, then we
860             // always follow it even if it's a symlink and follow_links is
861             // false. We are careful to not let this change the semantics of
862             // the DirEntry however. Namely, the DirEntry should still respect
863             // the follow_links setting. When it's disabled, it should report
864             // itself as a symlink. When it's enabled, it should always report
865             // itself as the target.
866             let md = itry!(fs::metadata(dent.path()).map_err(|err| {
867                 Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
868             }));
869             if md.file_type().is_dir() {
870                 itry!(self.push(&dent));
871             }
872         }
873         if is_normal_dir && self.opts.contents_first {
874             self.deferred_dirs.push(dent);
875             None
876         } else if self.skippable() {
877             None
878         } else {
879             Some(Ok(dent))
880         }
881     }
882 
get_deferred_dir(&mut self) -> Option<DirEntry>883     fn get_deferred_dir(&mut self) -> Option<DirEntry> {
884         if self.opts.contents_first {
885             if self.depth < self.deferred_dirs.len() {
886                 // Unwrap is safe here because we've guaranteed that
887                 // `self.deferred_dirs.len()` can never be less than 1
888                 let deferred: DirEntry = self
889                     .deferred_dirs
890                     .pop()
891                     .expect("BUG: deferred_dirs should be non-empty");
892                 if !self.skippable() {
893                     return Some(deferred);
894                 }
895             }
896         }
897         None
898     }
899 
push(&mut self, dent: &DirEntry) -> Result<()>900     fn push(&mut self, dent: &DirEntry) -> Result<()> {
901         // Make room for another open file descriptor if we've hit the max.
902         let free =
903             self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
904         if free == self.opts.max_open {
905             self.stack_list[self.oldest_opened].close();
906         }
907         // Open a handle to reading the directory's entries.
908         let rd = fs::read_dir(dent.path()).map_err(|err| {
909             Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
910         });
911         let mut list = DirList::Opened { depth: self.depth, it: rd };
912         if let Some(ref mut cmp) = self.opts.sorter {
913             let mut entries: Vec<_> = list.collect();
914             entries.sort_by(|a, b| match (a, b) {
915                 (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
916                 (&Err(_), &Err(_)) => Ordering::Equal,
917                 (&Ok(_), &Err(_)) => Ordering::Greater,
918                 (&Err(_), &Ok(_)) => Ordering::Less,
919             });
920             list = DirList::Closed(entries.into_iter());
921         }
922         if self.opts.follow_links {
923             let ancestor = Ancestor::new(&dent)
924                 .map_err(|err| Error::from_io(self.depth, err))?;
925             self.stack_path.push(ancestor);
926         }
927         // We push this after stack_path since creating the Ancestor can fail.
928         // If it fails, then we return the error and won't descend.
929         self.stack_list.push(list);
930         // If we had to close out a previous directory stream, then we need to
931         // increment our index the oldest still-open stream. We do this only
932         // after adding to our stack, in order to ensure that the oldest_opened
933         // index remains valid. The worst that can happen is that an already
934         // closed stream will be closed again, which is a no-op.
935         //
936         // We could move the close of the stream above into this if-body, but
937         // then we would have more than the maximum number of file descriptors
938         // open at a particular point in time.
939         if free == self.opts.max_open {
940             // Unwrap is safe here because self.oldest_opened is guaranteed to
941             // never be greater than `self.stack_list.len()`, which implies
942             // that the subtraction won't underflow and that adding 1 will
943             // never overflow.
944             self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
945         }
946         Ok(())
947     }
948 
pop(&mut self)949     fn pop(&mut self) {
950         self.stack_list.pop().expect("BUG: cannot pop from empty stack");
951         if self.opts.follow_links {
952             self.stack_path.pop().expect("BUG: list/path stacks out of sync");
953         }
954         // If everything in the stack is already closed, then there is
955         // room for at least one more open descriptor and it will
956         // always be at the top of the stack.
957         self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
958     }
959 
follow(&self, mut dent: DirEntry) -> Result<DirEntry>960     fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
961         dent =
962             DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
963         // The only way a symlink can cause a loop is if it points
964         // to a directory. Otherwise, it always points to a leaf
965         // and we can omit any loop checks.
966         if dent.is_dir() {
967             self.check_loop(dent.path())?;
968         }
969         Ok(dent)
970     }
971 
check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()>972     fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
973         let hchild = Handle::from_path(&child)
974             .map_err(|err| Error::from_io(self.depth, err))?;
975         for ancestor in self.stack_path.iter().rev() {
976             let is_same = ancestor
977                 .is_same(&hchild)
978                 .map_err(|err| Error::from_io(self.depth, err))?;
979             if is_same {
980                 return Err(Error::from_loop(
981                     self.depth,
982                     &ancestor.path,
983                     child.as_ref(),
984                 ));
985             }
986         }
987         Ok(())
988     }
989 
is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool>990     fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
991         let dent_device = util::device_num(dent.path())
992             .map_err(|err| Error::from_entry(dent, err))?;
993         Ok(self
994             .root_device
995             .map(|d| d == dent_device)
996             .expect("BUG: called is_same_file_system without root device"))
997     }
998 
skippable(&self) -> bool999     fn skippable(&self) -> bool {
1000         self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
1001     }
1002 }
1003 
1004 impl DirList {
close(&mut self)1005     fn close(&mut self) {
1006         if let DirList::Opened { .. } = *self {
1007             *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
1008         }
1009     }
1010 }
1011 
1012 impl Iterator for DirList {
1013     type Item = Result<DirEntry>;
1014 
1015     #[inline(always)]
next(&mut self) -> Option<Result<DirEntry>>1016     fn next(&mut self) -> Option<Result<DirEntry>> {
1017         match *self {
1018             DirList::Closed(ref mut it) => it.next(),
1019             DirList::Opened { depth, ref mut it } => match *it {
1020                 Err(ref mut err) => err.take().map(Err),
1021                 Ok(ref mut rd) => rd.next().map(|r| match r {
1022                     Ok(r) => DirEntry::from_entry(depth + 1, &r),
1023                     Err(err) => Err(Error::from_io(depth + 1, err)),
1024                 }),
1025             },
1026         }
1027     }
1028 }
1029 
1030 /// A recursive directory iterator that skips entries.
1031 ///
1032 /// Values of this type are created by calling [`.filter_entry()`] on an
1033 /// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
1034 ///
1035 /// Directories that fail the predicate `P` are skipped. Namely, they are
1036 /// never yielded and never descended into.
1037 ///
1038 /// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
1039 /// are not passed through this filter.
1040 ///
1041 /// If opening a handle to a directory resulted in an error, then it is yielded
1042 /// and no corresponding call to the predicate is made.
1043 ///
1044 /// Type parameter `I` refers to the underlying iterator and `P` refers to the
1045 /// predicate, which is usually `FnMut(&DirEntry) -> bool`.
1046 ///
1047 /// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
1048 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
1049 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1050 /// [`max_depth`]: struct.WalkDir.html#method.max_depth
1051 #[derive(Debug)]
1052 pub struct FilterEntry<I, P> {
1053     it: I,
1054     predicate: P,
1055 }
1056 
1057 impl<P> Iterator for FilterEntry<IntoIter, P>
1058 where
1059     P: FnMut(&DirEntry) -> bool,
1060 {
1061     type Item = Result<DirEntry>;
1062 
1063     /// Advances the iterator and returns the next value.
1064     ///
1065     /// # Errors
1066     ///
1067     /// If the iterator fails to retrieve the next value, this method returns
1068     /// an error value. The error will be wrapped in an `Option::Some`.
next(&mut self) -> Option<Result<DirEntry>>1069     fn next(&mut self) -> Option<Result<DirEntry>> {
1070         loop {
1071             let dent = match self.it.next() {
1072                 None => return None,
1073                 Some(result) => itry!(result),
1074             };
1075             if !(self.predicate)(&dent) {
1076                 if dent.is_dir() {
1077                     self.it.skip_current_dir();
1078                 }
1079                 continue;
1080             }
1081             return Some(Ok(dent));
1082         }
1083     }
1084 }
1085 
1086 impl<P> FilterEntry<IntoIter, P>
1087 where
1088     P: FnMut(&DirEntry) -> bool,
1089 {
1090     /// Yields only entries which satisfy the given predicate and skips
1091     /// descending into directories that do not satisfy the given predicate.
1092     ///
1093     /// The predicate is applied to all entries. If the predicate is
1094     /// true, iteration carries on as normal. If the predicate is false, the
1095     /// entry is ignored and if it is a directory, it is not descended into.
1096     ///
1097     /// This is often more convenient to use than [`skip_current_dir`]. For
1098     /// example, to skip hidden files and directories efficiently on unix
1099     /// systems:
1100     ///
1101     /// ```no_run
1102     /// use walkdir::{DirEntry, WalkDir};
1103     /// # use walkdir::Error;
1104     ///
1105     /// fn is_hidden(entry: &DirEntry) -> bool {
1106     ///     entry.file_name()
1107     ///          .to_str()
1108     ///          .map(|s| s.starts_with("."))
1109     ///          .unwrap_or(false)
1110     /// }
1111     ///
1112     /// # fn try_main() -> Result<(), Error> {
1113     /// for entry in WalkDir::new("foo")
1114     ///                      .into_iter()
1115     ///                      .filter_entry(|e| !is_hidden(e)) {
1116     ///     println!("{}", entry?.path().display());
1117     /// }
1118     /// # Ok(())
1119     /// # }
1120     /// ```
1121     ///
1122     /// Note that the iterator will still yield errors for reading entries that
1123     /// may not satisfy the predicate.
1124     ///
1125     /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
1126     /// passed to this predicate.
1127     ///
1128     /// Note that if the iterator has `contents_first` enabled, then this
1129     /// method is no different than calling the standard `Iterator::filter`
1130     /// method (because directory entries are yielded after they've been
1131     /// descended into).
1132     ///
1133     /// [`skip_current_dir`]: #method.skip_current_dir
1134     /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1135     /// [`max_depth`]: struct.WalkDir.html#method.max_depth
filter_entry(self, predicate: P) -> FilterEntry<Self, P>1136     pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
1137         FilterEntry { it: self, predicate }
1138     }
1139 
1140     /// Skips the current directory.
1141     ///
1142     /// This causes the iterator to stop traversing the contents of the least
1143     /// recently yielded directory. This means any remaining entries in that
1144     /// directory will be skipped (including sub-directories).
1145     ///
1146     /// Note that the ergonomics of this method are questionable since it
1147     /// borrows the iterator mutably. Namely, you must write out the looping
1148     /// condition manually. For example, to skip hidden entries efficiently on
1149     /// unix systems:
1150     ///
1151     /// ```no_run
1152     /// use walkdir::{DirEntry, WalkDir};
1153     ///
1154     /// fn is_hidden(entry: &DirEntry) -> bool {
1155     ///     entry.file_name()
1156     ///          .to_str()
1157     ///          .map(|s| s.starts_with("."))
1158     ///          .unwrap_or(false)
1159     /// }
1160     ///
1161     /// let mut it = WalkDir::new("foo").into_iter();
1162     /// loop {
1163     ///     let entry = match it.next() {
1164     ///         None => break,
1165     ///         Some(Err(err)) => panic!("ERROR: {}", err),
1166     ///         Some(Ok(entry)) => entry,
1167     ///     };
1168     ///     if is_hidden(&entry) {
1169     ///         if entry.file_type().is_dir() {
1170     ///             it.skip_current_dir();
1171     ///         }
1172     ///         continue;
1173     ///     }
1174     ///     println!("{}", entry.path().display());
1175     /// }
1176     /// ```
1177     ///
1178     /// You may find it more convenient to use the [`filter_entry`] iterator
1179     /// adapter. (See its documentation for the same example functionality as
1180     /// above.)
1181     ///
1182     /// [`filter_entry`]: #method.filter_entry
skip_current_dir(&mut self)1183     pub fn skip_current_dir(&mut self) {
1184         self.it.skip_current_dir();
1185     }
1186 }
1187