On Monday, 4 March 2024 at 18:08:52 UTC, Andy Valencia wrote:
For any other newbie dlang voyagers, here's a version which
works as expected using the system memory allocator. On my
little i7 I get 1.48 secs wallclock with 5.26 CPU seconds.
...
Using a technique I found in a unit test in std/concurrency.d, I
managed to share process memory without GC. It counted up to
1,000,000,000 on my low end i7 in:
real 0m15.666s
user 0m59.913s
sys 0m0.004s
import core.atomic : atomicFetchAdd;
import std.concurrency : spawn, send, receiveOnly, ownerTid;
import core.thread : Thread;
const uint NSWEPT = 1_000_000_000;
const uint NCPU = 4;
void
doadd()
{
auto val = receiveOnly!(shared(int)[]);
for (uint count = 0; count < NSWEPT/NCPU; ++count) {
atomicFetchAdd(val[0], 1);
}
ownerTid.send(true);
}
void
main()
{
static shared int[] val = new shared(int)[1];
// Parallel workers
for (int x = 0; x < NCPU; ++x) {
auto tid = spawn(&doadd);
tid.send(val);
}
// Pick up all completed workers
for (int x = 0; x < NCPU; ++x) {
receiveOnly!(bool);
}
assert(val[0] == NSWEPT);
}