I wrote a "count newlines" based on mapped files. It used about twice the CPU of the version which just read 1 meg at a time. I thought something was amiss (needless slice indirection or something), so I wrote the code in C. It had the same CPU usage as the D version. So...mapped files, not so much. Not D's fault. And writing it in C made me realize how much easier it is to code in D!

The D version:

import std.stdio : writeln;
import std.mmfile : MmFile;

const uint CHUNKSZ = 65536;

size_t
countnl(ref shared char[] data)
{
    size_t res = 0;

    foreach (c; data) {
        if (c == '\n') {
            res += 1;
        }
    }
    return res;
}

void
usage(in string progname)
{
    import core.stdc.stdlib : exit;
    import std.stdio : stderr;

    stderr.writeln("Usage is: ", progname, " %s <file> ...");
    exit(1);
}

public:
void
main(string[] argv)
{
    if (argv.length < 2) {
        usage(argv[0]);
    }
    foreach(mn; argv[1 .. $]) {
        auto mf = new MmFile(mn);
        auto data = cast(shared char[])mf.opSlice();
        size_t res;
        res = countnl(data);
        writeln(mn, ": ", res);
    }
}

And the C one (no performance gain over D):

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>

static unsigned long
countnl(int fd, char *nm)
{
    char *buf, *p;
    struct stat st;
    unsigned int cnt;
    unsigned long res;

    if (fstat(fd, &st) < 0) {
        perror(nm);
        return(0);
    }

    cnt = st.st_size;
    buf = mmap(0, cnt, PROT_READ, MAP_SHARED, fd, 0);
    if (buf == MAP_FAILED) {
        perror(nm);
        return(0);
    }
    res = 0L;
    for (p = buf; cnt; cnt -= 1) {
        if (*p++ == '\n') {
            res += 1L;
        }
    }
    munmap(buf, st.st_size);
    return(res);
}

int
main(int argc, char **argv)
{
    int x;

    for (x = 1; x < argc; ++x) {
        unsigned long res;
        char *nm = argv[x];

        int fd = open(nm, O_RDONLY);
        if (fd < 0) {
            perror(nm);
            continue;
        }
        res = countnl(fd, nm);
        close(fd);
        printf("%s: %uld\n", nm, res);
    }
}

Reply via email to