There was a nice blog-post about implementing low-lock singletons in D, here: http://davesdprogramming.wordpress.com/2013/05/06/low-lock-singletons/
One suggestion on Reddit was by dawgfoto (I think this is Martin Nowak?), to use atomic primitives instead: http://www.reddit.com/r/programming/comments/1droaa/lowlock_singletons_in_d_the_singleton_pattern/c9tmz07 I wanted to benchmark these different approaches. I was expecting Martin's implementation to be the fastest one, but on my machine (Athlon II X4 620 - 2.61GHz) the implementation in the blog post turns out to be the fastest one. I'm wondering whether my test case is flawed in some way. Btw, I think we should put an implementation of this into Phobos. The timings on my machine: Test time for LockSingleton: 542 msecs. Test time for SyncSingleton: 20 msecs. Test time for AtomicSingleton: 755 msecs. Here's the code: http://codepad.org/TMb0xxYw And pasted below for convenience: ----- module singleton; import std.concurrency; import core.atomic; import core.thread; class LockSingleton { static LockSingleton get() { __gshared LockSingleton _instance; synchronized { if (_instance is null) _instance = new LockSingleton; } return _instance; } private: this() { } } class SyncSingleton { static SyncSingleton get() { static bool _instantiated; // tls __gshared SyncSingleton _instance; if (!_instantiated) { synchronized { if (_instance is null) _instance = new SyncSingleton; _instantiated = true; } } return _instance; } private: this() { } } class AtomicSingleton { static AtomicSingleton get() { shared bool _instantiated; __gshared AtomicSingleton _instance; // only enter synchronized block if not instantiated if (!atomicLoad!(MemoryOrder.acq)(_instantiated)) { synchronized { if (_instance is null) _instance = new AtomicSingleton; atomicStore!(MemoryOrder.rel)(_instantiated, true); } } return _instance; } } version (unittest) { ulong _thread_call_count; // TLS } unittest { import std.datetime; import std.stdio; import std.string; import std.typetuple; foreach (TestClass; TypeTuple!(LockSingleton, SyncSingleton, AtomicSingleton)) { // mixin to avoid multiple definition errors mixin(q{ static void test_%1$s() { foreach (i; 0 .. 1024_000) { // just trying to avoid the compiler from doing dead-code optimization _thread_call_count += (TestClass.get() !is null); } } auto sw = StopWatch(AutoStart.yes); enum threadCount = 4; foreach (i; 0 .. threadCount) spawn(&test_%1$s); thread_joinAll(); }.format(TestClass.stringof)); sw.stop(); writefln("Test time for %s: %s msecs.", TestClass.stringof, sw.peek.msecs); } } void main() { } -----
