Directory Traversal
| globset
| | |
Find files that have been modified in the last 24 hours
Gets the current working directory by calling std::env::current_dir
⮳ then for each entries in std::fs::read_dir
⮳ extracts the
std::fs::DirEntry::path
⮳ and gets the metadata via std::fs::Metadata
⮳. The
std::fs::Metadata::modified
⮳ returns the std::time::SystemTime::elapsed
⮳ time since last modification. std::time::Duration::as_secs
⮳ converts the time to seconds and compared with 24 hours (24 60 60 seconds). std::fs::Metadata::is_file
⮳ filters out directories.
use std::env; use std::fs; use anyhow::Result; use anyhow::anyhow; fn main() -> Result<()> { let current_dir = env::current_dir()?; println!( "Entries modified in the last 24 hours in {:?}:", current_dir ); for entry in fs::read_dir(current_dir)? { let entry = entry?; let path = entry.path(); let metadata = fs::metadata(&path)?; if let Ok(time) = metadata.modified() { // Note: SystemTime.elapsed can be flaky. if let Ok(duration) = time.elapsed() { let last_modified = duration.as_secs(); if (last_modified < 24 * 3600) && metadata.is_file() { println!( "Last modified: {:?} seconds, is read only: {:?}, size: {:?} bytes, filename: {:?}", last_modified, metadata.permissions().readonly(), metadata.len(), path.file_name().ok_or(anyhow!("No filename"))? ); } } } else { println!("Last modification time not supported on this platform"); } } Ok(()) }
Find loops for a given path
same-file
is a simple crate for determining whether two file paths point to the same file.
Use same_file::is_same_file
⮳ to detect loops for a given path. For example, a loop could be created on a Unix system via symlinks:
mkdir -p /tmp/foo/bar/baz
ln -s /tmp/foo/ /tmp/foo/bar/baz/qux
The following would assert that a loop exists.
#![cfg(target_os = "linux")] use std::io; use std::path::Path; use std::path::PathBuf; use same_file::is_same_file; // Returns the two paths that form a loop, if found // Returns None otherwise // P: AsRef<Path> accepts PathBuf, Path... fn contains_loop<P: AsRef<Path>>( path: P, ) -> io::Result<Option<(PathBuf, PathBuf)>> { let path: &Path = path.as_ref(); // Copy into a mutable PathBuf let mut path_buf: PathBuf = path.to_path_buf(); // Truncate path_buf in succession: /stuff/much -> /stuff -> / while path_buf.pop() { if is_same_file(&path_buf, path)? { return Ok(Some((path_buf, path.to_path_buf()))); // Investigate the parent path against its own parents as well } else if let Some((looped_path1, looped_path2)) = contains_loop(&path_buf)? { return Ok(Some((looped_path1, looped_path2))); } } Ok(None) } fn main() { // `is_same_file` returns true if the two file paths may correspond to the // same file. assert!(is_same_file("/tmp/foo", "/tmp/./foo").unwrap_or(false)); assert_eq!( contains_loop("/tmp/foo/bar/baz/qux/bar/baz").unwrap(), Some(( PathBuf::from("/tmp/foo"), PathBuf::from("/tmp/foo/bar/baz/qux") )) ); println!("Loop found."); }
Recursively find duplicate file names
Find recursively in the current directory duplicate filenames, printing them only once.
use std::collections::HashMap; use walkdir::WalkDir; fn main() { let mut filenames = HashMap::new(); for entry in WalkDir::new(".") .into_iter() .filter_map(Result::ok) .filter(|e| !e.file_type().is_dir()) { let f_name = String::from(entry.file_name().to_string_lossy()); let counter = filenames.entry(f_name.clone()).or_insert(0); *counter += 1; if *counter == 2 { println!("{}", f_name); } } }
Recursively find all files with a given predicate
Find files modified within the last day in the current directory. Using walkdir::WalkDir::follow_links
⮳ ensures symbolic links are followed like they were normal directories and files.
use anyhow::Result; use walkdir::WalkDir; fn main() -> Result<()> { for entry in WalkDir::new(".") .follow_links(true) .into_iter() .filter_map(|e| e.ok()) { let f_name = entry.file_name().to_string_lossy(); // `metadata()` can return errors for path values that the program // does not have permissions to access or if the path no longer exists. if let Ok(metadata) = entry.metadata() { let sec = metadata.modified()?; if let Ok(elapsed) = sec.elapsed() { if elapsed.as_secs() < 86400 { println!("{}", f_name); } } } // You may also check for specific extensions: // && f_name.ends_with(".json") } Ok(()) }
Traverse directories while skipping dotfiles
Uses walkdir::IntoIter::filter_entry
⮳ to descend recursively into entries passing the is_not_hidden
predicate thus skipping hidden files and directories. std::iter::Iterator::filter
⮳ applies to each walkdir::IntoIter::filter_entry
⮳ even if the parent is a hidden directory.
Root dir "."
yields through walkdir::WalkDir::depth
usage in is_not_hidden
predicate.
use walkdir::DirEntry; use walkdir::WalkDir; fn is_not_hidden(entry: &DirEntry) -> bool { entry .file_name() .to_str() .map(|s| entry.depth() == 0 || !s.starts_with('.')) .unwrap_or(false) } fn main() { let w = WalkDir::new("."); w.into_iter() .filter_entry(is_not_hidden) .filter_map(|v| v.ok()) .for_each(|x| println!("{}", x.path().display())); }
Recursively calculate file sizes at a given depth
Recursion depth can be flexibly set by walkdir::Walkdir::min_depth
⮳ & walkdir::WalkDir::max_depth
⮳ methods. Calculates sum of all file sizes to 3 subfolders depth, ignoring files in the root folder.
use walkdir::WalkDir; fn main() { let total_size = WalkDir::new(".") .min_depth(1) .max_depth(3) .into_iter() .filter_map(|entry| entry.ok()) .filter_map(|entry| entry.metadata().ok()) .filter(|metadata| metadata.is_file()) .fold(0, |acc, m| acc + m.len()); println!("Total size: {} bytes.", total_size); }
Find all files with a given extension recursively
Recursively find all PNG files in the current directory. In this case, the **
pattern matches the current directory and all subdirectories.
Use the **
pattern in any path portion. For example, /media/**/*.png
matches all PNGs in media
and it's subdirectories.
use anyhow::Result; use glob::glob; fn main() -> Result<()> { for entry in glob("**/*.png")? { println!("{}", entry?.display()); } Ok(()) }
Find all files with given pattern, ignoring filename case
Find all image files in the /media/
directory matching the img_[0-9][0-9]*.png
pattern.
A custom glob::MatchOptions
⮳ struct is passed to the glob::glob_with
⮳ function making the glob pattern case insensitive while keeping the other options std::default::Default
⮳.
use anyhow::Result; use glob::MatchOptions; use glob::glob_with; fn main() -> Result<()> { let options = MatchOptions { case_sensitive: false, ..Default::default() }; for entry in glob_with("/media/img_[0-9]*.png", options)? { println!("{}", entry?.display()); } Ok(()) }
globset
globset
allows multiple globs to be evaluated at once. Glob set matching is the process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched.