On 11/9/22 12:06, Ali Çehreli wrote:

> I am using its sibling 'ftw'

Now that we know that dirEntries works properly, I decided not to use ftw.

However, ftw performs about twice as fast as dirEntries (despite some common code in the implementation below). I am leaving it here in case somebody finds it useful. (Why don't I put it on github then; ok, some day I will.)

import core.sys.posix.sys.stat;
import std.algorithm;
import std.exception;
import std.file;
import std.path;
import std.range;
import std.string;

// The Posix "file tree walker" function
extern (C)
int ftw(const char *dirpath,
int function (const char *fpath, const stat_t *sb, int typeflag) fn,
        int nopenfd);

enum TypeFlag {
    FTW_F,   // regular file
    FTW_D,   // directory
    // See 'man nftw' or /usr/include/ftw.h for the other values
}

struct DirectoryEntry {
    string name;
    ulong size;
}

struct WalkResult {
    DirectoryEntry[] entries;
    string[] emptyDirs;
}

WalkResult directoryWalk_ftw(string root) {
    WalkResult impl_() {
// These have to be 'static' because ftw() does not allow us to pass a
        // context. And that's why this function must only be called from a
        // synchronized block.
        static DirectoryEntry[] entries;
        static string[] dirs;

        entries.length = 0;
        entries.assumeSafeAppend();

        dirs.length = 0;
        dirs.assumeSafeAppend();

        // This is the callback that ftw() uses.
        extern (C)
        int handler(const char *fpath, const stat_t *sb, int typeflag) {
            const path = fpath.fromStringz.idup;

            switch (typeflag) {
            case TypeFlag.FTW_F:
                entries ~= DirectoryEntry(path, sb.st_size);
                break;

            case TypeFlag.FTW_D:
                dirs ~= path;
                break;

            default:
                import std.stdio;
                writefln!"Ignoring type %s file: %s\n(See 'man nftw')b"(
                    path, typeflag);
                break;
            }

            return 0;
        }

// The tree walk will be faster up-to this "search depth" (See 'man nftw')
        enum nopenfd = 32;

        const ret = ftw(root.toStringz, &handler, nopenfd);
        enforce(ret == 0,
format!"Failed walking the directory tree at %s; error: %s"(
                    root, ret));

        string[] nonEmptyDirs = chain(entries.map!(e => e.name),
                                      dirs)
                                .map!dirName
                                .array
                                .sort
                                .uniq
                                .array;
        sort(dirs);

        string[] emptyDirs = setDifference(dirs, nonEmptyDirs)
                             .array;

        return WalkResult(entries.dup, emptyDirs);
    }

    synchronized {
        return impl_();
    }
}

WalkResult directoryWalk_dirEntries(string root) {
    DirectoryEntry[] entries;
    string[] dirs;

    foreach (entry; dirEntries(root, SpanMode.depth)) {
        if (entry.isDir) {
            dirs ~= entry;

        } else {
            entries ~= DirectoryEntry(entry, entry.getSize);
        }
    }

    string[] nonEmptyDirs = chain(entries.map!(e => e.name),
                                  dirs)
                            .map!dirName
                            .array
                            .sort
                            .uniq
                            .array;
    sort(dirs);

    string[] emptyDirs = setDifference(dirs, nonEmptyDirs)
                         .array;

    return WalkResult(entries.dup, emptyDirs);
}

int main(string[] args) {
    import std.datetime.stopwatch;
    import std.stdio;
    import std.path;

    if (args.length != 2) {
stderr.writefln!"Please provide the directory to walk:\n\n %s <directory>\n"
            (args[0].baseName);
        return 1;
    }

    const dir = buildNormalizedPath("/home/ali/dlang");

    auto timings = benchmark!({ directoryWalk_ftw(dir); },
                              { directoryWalk_dirEntries(dir); })(10);

    writefln!("ftw       : %s\n" ~
              "dirEntries: %s")(timings[0], timings[1]);

    return 0;
}

Ali

Reply via email to