Directory Traversal

Find files that have been modified in the last 24 hours

std cat-filesystem

Gets the current working directory by calling std::env::current_dir⮳ then for each entries in std::fs::read_dir⮳ extracts the std::fs::DirEntry::path⮳ and gets the metadata via std::fs::Metadata⮳. The std::fs::Metadata::modified⮳ returns the std::time::SystemTime::elapsed⮳ time since last modification. std::time::Duration::as_secs⮳ converts the time to seconds and compared with 24 hours (24 60 60 seconds). std::fs::Metadata::is_file⮳ filters out directories.

use std::env;
use std::fs;

use anyhow::anyhow;
use anyhow::Result;

fn main() -> Result<()> {
    let current_dir = env::current_dir()?;
    println!(
        "Entries modified in the last 24 hours in {:?}:",
        current_dir
    );

    for entry in fs::read_dir(current_dir)? {
        let entry = entry?;
        let path = entry.path();
        let metadata = fs::metadata(&path)?;
        if let Ok(time) = metadata.modified() {
            // Note: SystemTime.elapsed can be flaky.
            if let Ok(duration) = time.elapsed() {
                let last_modified = duration.as_secs();
                if (last_modified < 24 * 3600) && metadata.is_file() {
                    println!(
                    "Last modified: {:?} seconds, is read only: {:?}, size: {:?} bytes, filename: {:?}", last_modified,
                    metadata.permissions().readonly(),
                    metadata.len(),
                    path.file_name().ok_or(anyhow!("No filename"))?
                    );
                }
            }
        } else {
            println!("Last modification time not supported on this platform");
        }
    }
    Ok(())
}

Find loops for a given path

same-file same-file-crates.io same-file-github same-file-lib.rs cat-filesystem

Use same_file::is_same_file⮳ to detect loops for a given path. For example, a loop could be created on a Unix system via symlinks:

mkdir -p /tmp/foo/bar/baz
ln -s /tmp/foo/ /tmp/foo/bar/baz/qux

The following would assert that a loop exists.

use std::io;
use std::path::Path;
use std::path::PathBuf;

use same_file::is_same_file;

fn contains_loop<P: AsRef<Path>>(
    path: P,
) -> io::Result<Option<(PathBuf, PathBuf)>> {
    let path = path.as_ref();
    let mut path_buf = path.to_path_buf();
    while path_buf.pop() {
        if is_same_file(&path_buf, path)? {
            return Ok(Some((path_buf, path.to_path_buf())));
        } else if let Some(looped_paths) = contains_loop(&path_buf)? {
            return Ok(Some(looped_paths));
        }
    }
    Ok(None)
}

fn main() {
    assert_eq!(
        contains_loop("/tmp/foo/bar/baz/qux/bar/baz").unwrap(),
        Some((
            PathBuf::from("/tmp/foo"),
            PathBuf::from("/tmp/foo/bar/baz/qux")
        ))
    );
}

Recursively find duplicate file names

cat-filesystem

walkdir walkdir-crates.io walkdir-github walkdir-lib.rs

Find recursively in the current directory duplicate filenames, printing them only once.

use std::collections::HashMap;

use walkdir::WalkDir;

fn main() {
    let mut filenames = HashMap::new();

    for entry in WalkDir::new(".")
        .into_iter()
        .filter_map(Result::ok)
        .filter(|e| !e.file_type().is_dir())
    {
        let f_name = String::from(entry.file_name().to_string_lossy());
        let counter = filenames.entry(f_name.clone()).or_insert(0);
        *counter += 1;

        if *counter == 2 {
            println!("{}", f_name);
        }
    }
}

Recursively find all files with given predicate

walkdir cat-filesystem

Find files modified within the last day in the current directory. Using walkdir::WalkDir::follow_links⮳ ensures symbolic links are followed like they were normal directories and files.

use anyhow::Result;
use walkdir::WalkDir;

fn main() -> Result<()> {
    for entry in WalkDir::new(".")
        .follow_links(true)
        .into_iter()
        .filter_map(|e| e.ok())
    {
        let f_name = entry.file_name().to_string_lossy();

        // `metadata()` can return errors for path values that the program
        // does not have permissions to access or if the path no longer exists.
        if let Ok(metadata) = entry.metadata() {
            let sec = metadata.modified()?;
            if let Ok(elapsed) = sec.elapsed() {
                if elapsed.as_secs() < 86400 {
                    println!("{}", f_name);
                }
            }
        }
        // You may also check for specific extensions:
        // && f_name.ends_with(".json")
    }

    Ok(())
}

Traverse directories while skipping dotfiles

walkdir cat-filesystem

Uses walkdir::IntoIter::filter_entry⮳ to descend recursively into entries passing the is_not_hidden predicate thus skipping hidden files and directories. std::iter::Iterator::filter⮳ applies to each walkdir::IntoIter::filter_entry⮳ even if the parent is a hidden directory.

Root dir "." yields through walkdir::WalkDir::depth usage in is_not_hidden predicate.

use walkdir::DirEntry;
use walkdir::WalkDir;

fn is_not_hidden(entry: &DirEntry) -> bool {
    entry
        .file_name()
        .to_str()
        .map(|s| entry.depth() == 0 || !s.starts_with('.'))
        .unwrap_or(false)
}

fn main() {
    WalkDir::new(".")
        .into_iter()
        .filter_entry(is_not_hidden)
        .filter_map(|v| v.ok())
        .for_each(|x| println!("{}", x.path().display()));
}

Recursively calculate file sizes at given depth

walkdir cat-filesystem

Recursion depth can be flexibly set by walkdir::Walkdir::min_depth⮳ & walkdir::WalkDir::max_depth⮳ methods. Calculates sum of all file sizes to 3 subfolders depth, ignoring files in the root folder.

use walkdir::WalkDir;

fn main() {
    let total_size = WalkDir::new(".")
        .min_depth(1)
        .max_depth(3)
        .into_iter()
        .filter_map(|entry| entry.ok())
        .filter_map(|entry| entry.metadata().ok())
        .filter(|metadata| metadata.is_file())
        .fold(0, |acc, m| acc + m.len());

    println!("Total size: {} bytes.", total_size);
}

Find all png files recursively

glob cat-filesystem

Recursively find all PNG files in the current directory. In this case, the ** pattern matches the current directory and all subdirectories.

Use the ** pattern in any path portion. For example, /media/**/*.png matches all PNGs in media and it's subdirectories.

use anyhow::Result;
use glob::glob;

fn main() -> Result<()> {
    for entry in glob("**/*.png")? {
        println!("{}", entry?.display());
    }

    Ok(())
}

Find all files with given pattern ignoring filename case

glob cat-filesystem

Find all image files in the /media/ directory matching the img_[0-9][0-9]*.png pattern.

A custom glob::MatchOptions⮳ struct is passed to the glob::glob_with⮳ function making the glob pattern case insensitive while keeping the other options std::default::Default⮳.

use anyhow::Result;
use glob::glob_with;
use glob::MatchOptions;

fn main() -> Result<()> {
    let options = MatchOptions {
        case_sensitive: false,
        ..Default::default()
    };

    for entry in glob_with("/media/img_[0-9]*.png", options)? {
        println!("{}", entry?.display());
    }

    Ok(())
}