On Sunday, 5 October 2014 at 21:25:39 UTC, Ali Çehreli wrote:
import std.stdio, std.cstream, std.parallelism, std.datetime,
std.range, core.atomic;
void main()
{
StopWatch sw;
shared ulong sum1 = 0; ulong sum2 = 0, sum3 = 0, time1, time2,
time3;
enum numThreads = 4; // If numThreads is a variable then it
significantly slows down the process
ulong iter = 1000000L;
iter = numThreads*cast(ulong)(iter/numThreads); // Force iter to
be a multiple of the number of threads so we can partition
uniformly
auto thds = parallel(iota(0, cast(uint)iter,
cast(uint)(iter/numThreads)));
sw.reset(); sw.start();
foreach(i; thds) { ulong s = 0; for(ulong k = 0; k <
iter/numThreads; k++) { s += k; } s += i*iter/numThreads;
atomicOp!"+="(sum1, s); }
sw.stop(); time1 = sw.peek().usecs;
sw.reset(); sw.start(); for (ulong i = 0; i < iter; ++i) { sum2
+= i; } sw.stop(); time2 = sw.peek().usecs;
writefln("parallel sum : %s, elapsed %s us", sum1, time1);
writefln("single thread sum : %s, elapsed %s us", sum2, time2);
if (time1 > 0) writefln("Efficiency : %s%%", 100*time2/time1);
din.getc();
}
Playing around with the code above, it seems when numThreads is
an enum, the execution time is significantly effected(that from
being < 100% to being >100% efficiency).
results on a 4 core laptop with release builds:
parallel sum : 499999500000, elapsed 2469 us
single thread sum : 499999500000, elapsed 8054 us
Efficiency : 326%
when numThreads is an int:
parallel sum : 499999500000, elapsed 21762 us
single thread sum : 499999500000, elapsed 8033 us
Efficiency : 36%