On Wednesday, 18 May 2016 at 19:55:40 UTC, Andrei Alexandrescu wrote:
On 5/18/16 3:14 PM, Basile B. wrote:
Hey, los douchbagos...

I've recently worked on a suffix array
(https://github.com/BBasile/iz/blob/master/import/iz/strings.d#L1555)

Test for presence are excelent (O1)

canFind    % of canFind   : 100
SuffixArr1 % of canFind   : 2.78019
SuffixArr2 % of canFind   : 3.15439
runtime AA % of runtime AA: 100
EMSI hashs % of runtime AA: 167.434
SuffixArr1 % of runtime AA: 97.8582
SuffixArr2 % of runtime AA: 111.029
EMSI hashs % of EMSI hashs: 100
SuffixArr1 % of EMSI hashs: 58.4458
SuffixArr2 % of EMSI hashs: 66.3123

Could you explain the numbers a bit? -- Andrei

the benchmark is:
----
#!runnable-flags: -O -release -inline -boundscheck=off
module runnable;

import std.stdio;
import containers.hashset, containers.internal.hash;
import std.experimental.allocator.mallocator;

void main(string[] args)
{
    import std.datetime, iz.strings, std.random, std.range;
    import core.memory: GC;
    GC.disable;

    string[] wordsA, wordsB;
    ubyte[] source = cast(ubyte[])"ABCDEFGHIJKL".dup;
    foreach(i; 0..64)
    {
        wordsA ~= cast(string) randomCover(source, rndGen).array;
        rndGen.popFront;
        if ((i & 1) == 0)
            wordsB ~= wordsA[i];
        else
        {
wordsB ~= cast(string) randomCover(source, rndGen).array;
            rndGen.popFront;
        }
    }

    auto arrOld = SuffixArray!string(wordsA);
    auto arrNew = SuffixArrayTemp!string(wordsA);

    enum repeat = 2^^10;

    size_t test(T)(ref T t)
    {
        StopWatch sw;
        sw.start;
        foreach(i; 0..64)
            foreach(j; 0..repeat)
        {
            auto n = wordsB[i] in t;
        }
        return sw.peek.hnsecs;
    }

    size_t refTest()
    {
        import std.algorithm.searching: canFind;
        import std.algorithm.sorting;
        StopWatch sw;
        auto sorted = sort(wordsA).array;
        sw.start;
        foreach(i; 0..64)
            foreach(j; 0..repeat)
        {
            auto n = sorted.canFind(wordsB[i]);
        }
        return sw.peek.hnsecs;
    }

    size_t aaTest()
    {
        StopWatch sw;
        bool[string] aa;
        foreach(w; wordsA)
            aa[w] = true;
        sw.start;
        foreach(i; 0..64)
            foreach(j; 0..repeat)
        {
            auto n = wordsB[i] in aa;
        }
        return sw.peek.hnsecs;
    }

    size_t aa2Test()
    {
        static size_t hasher(string value) pure
        {
            size_t result;
            foreach(i; 1 .. value.length)
                result += value[i-1] ^ value[i] << (i-1);
            return result;
        }
        StopWatch sw;
alias HS = HashSet!(string, Mallocator, generateHash!string, false);
        HS aa;
        foreach(w; wordsA)
            aa.insert(w);
        sw.start;
        foreach(i; 0..64)
            foreach(j; 0..repeat)
        {
            auto n = wordsB[i] in aa;
        }
        return sw.peek.hnsecs;
    }


    auto tA = refTest();
    auto tB = aaTest();
    auto tC = aa2Test();
    auto t1 = test(arrOld);
    auto t2 = test(arrNew);

    writeln("canFind    % of canFind   : ", tA / (tA * 0.01));
    writeln("SuffixArr1 % of canFind   : ", t1 / (tA * 0.01));
    writeln("SuffixArr2 % of canFind   : ", t2 / (tA * 0.01));

    writeln("runtime AA % of runtime AA: ", tB / (tB * 0.01));
    writeln("EMSI hashs % of runtime AA: ", tC / (tB * 0.01));
    writeln("SuffixArr1 % of runtime AA: ", t1 / (tB * 0.01));
    writeln("SuffixArr2 % of runtime AA: ", t2 / (tB * 0.01));

    writeln("EMSI hashs % of EMSI hashs: ", tC / (tC * 0.01));
    writeln("SuffixArr1 % of EMSI hashs: ", t1 / (tC * 0.01));
    writeln("SuffixArr2 % of EMSI hashs: ", t2 / (tC * 0.01));
}
----

Reply via email to