On Sunday, 5 October 2014 at 21:25:39 UTC, Ali Çehreli wrote:
import std.stdio, std.cstream, std.parallelism, std.datetime, std.range, core.atomic;

void main()
{       
        StopWatch sw;
shared ulong sum1 = 0; ulong sum2 = 0, sum3 = 0, time1, time2, time3;

enum numThreads = 4; // If numThreads is a variable then it significantly slows down the process
        ulong iter = 1000000L;
iter = numThreads*cast(ulong)(iter/numThreads); // Force iter to be a multiple of the number of threads so we can partition uniformly

auto thds = parallel(iota(0, cast(uint)iter, cast(uint)(iter/numThreads)));

        sw.reset(); sw.start();
foreach(i; thds) { ulong s = 0; for(ulong k = 0; k < iter/numThreads; k++) { s += k; } s += i*iter/numThreads; atomicOp!"+="(sum1, s); }
        sw.stop(); time1 = sw.peek().usecs;



sw.reset(); sw.start(); for (ulong i = 0; i < iter; ++i) { sum2 += i; } sw.stop(); time2 = sw.peek().usecs;

        writefln("parallel sum : %s, elapsed %s us", sum1, time1);
        writefln("single thread sum : %s, elapsed %s us", sum2, time2);
        if (time1 > 0) writefln("Efficiency : %s%%", 100*time2/time1);
        din.getc();
}

Playing around with the code above, it seems when numThreads is an enum, the execution time is significantly effected(that from being < 100% to being >100% efficiency).

results on a 4 core laptop with release builds:

parallel sum : 499999500000, elapsed 2469 us
single thread sum : 499999500000, elapsed 8054 us
Efficiency : 326%


when numThreads is an int:

parallel sum : 499999500000, elapsed 21762 us
single thread sum : 499999500000, elapsed 8033 us
Efficiency : 36%

Reply via email to