#3553: parallel gc suffers badly if one thread is descheduled
---------------------------------+------------------------------------------
Reporter: simonmar | Owner:
Type: bug | Status: new
Priority: normal | Milestone: 6.12.2
Component: Runtime System | Version: 6.10.4
Severity: normal | Resolution:
Keywords: | Difficulty: Unknown
Testcase: | Os: Unknown/Multiple
Architecture: Unknown/Multiple |
---------------------------------+------------------------------------------
Comment (by simonmar):
I tried using condition variables, but it was significantly slower than
spinlocks. Experimental patch attached (or would be, if it wasn't so
large).
{{{
Tue Oct 6 16:32:58 BST 2009 Simon Marlow <[email protected]>
* EXPERIMENT: use condition variables instead of spinlocks for the GC
barrier
This was significantly slower, averaging +20% with 7 cores on
nofib/parallel.
{
hunk ./rts/sm/GC.c 132
+#if defined(THREADED_RTS)
+Mutex gc_threads_ready_lock;
+Condition gc_threads_ready_cond;
+nat gc_threads_ready;
+
+Mutex gc_threads_go_lock;
+Condition gc_threads_go_cond;
+rtsBool gc_threads_go;
+#endif
+
hunk ./rts/sm/GC.c 885
- initSpinLock(&t->gc_spin);
- initSpinLock(&t->mut_spin);
- ACQUIRE_SPIN_LOCK(&t->gc_spin);
hunk ./rts/sm/GC.c 937
+
+ initMutex(&gc_threads_ready_lock);
+ initCondition(&gc_threads_ready_cond);
+ gc_threads_ready = 0;
+ initMutex(&gc_threads_go_lock);
+ initCondition(&gc_threads_go_cond);
+ gc_threads_go = rtsFalse;
hunk ./rts/sm/GC.c 1094
- // Wait until we're told to wake up
- RELEASE_SPIN_LOCK(&gct->mut_spin);
hunk ./rts/sm/GC.c 1095
+
+ // Wait until we're told to wake up
+ ACQUIRE_LOCK(&gc_threads_ready_lock);
+ gc_threads_ready++;
+ debugTrace(DEBUG_gc, "GC thread %d: gc_threads_ready = %d",
gct->thread_index, gc_threads_ready);
+ if (gc_threads_ready >= RtsFlags.ParFlags.nNodes-1) {
+ signalCondition(&gc_threads_ready_cond);
+ }
+ RELEASE_LOCK(&gc_threads_ready_lock);
+
hunk ./rts/sm/GC.c 1106
- ACQUIRE_SPIN_LOCK(&gct->gc_spin);
+
+ ACQUIRE_LOCK(&gc_threads_go_lock);
+ while (!gc_threads_go) {
+ waitCondition(&gc_threads_go_cond, &gc_threads_go_lock);
+ }
+ RELEASE_LOCK(&gc_threads_go_lock);
hunk ./rts/sm/GC.c 1134
- // Wait until we're told to continue
- RELEASE_SPIN_LOCK(&gct->gc_spin);
hunk ./rts/sm/GC.c 1135
+
+ // Wait until we're told to continue
+ ACQUIRE_LOCK(&gc_threads_ready_lock);
+ gc_threads_ready++;
+ if (gc_threads_ready >= RtsFlags.ParFlags.nNodes-1) {
+ signalCondition(&gc_threads_ready_cond);
+ }
+ RELEASE_LOCK(&gc_threads_ready_lock);
+
hunk ./rts/sm/GC.c 1146
- ACQUIRE_SPIN_LOCK(&gct->mut_spin);
+
+ ACQUIRE_LOCK(&gc_threads_go_lock);
+ while (gc_threads_go) {
+ waitCondition(&gc_threads_go_cond, &gc_threads_go_lock);
+ }
+ RELEASE_LOCK(&gc_threads_go_lock);
+
hunk ./rts/sm/GC.c 1155
+ gct->wakeup = GC_THREAD_INACTIVE;
+
hunk ./rts/sm/GC.c 1169
- nat i, j;
+ nat i;
hunk ./rts/sm/GC.c 1172
- while(retry) {
- for (i=0; i < n_threads; i++) {
- if (i == me) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
- prodCapability(&capabilities[i], cap->running_task);
- }
- }
- for (j=0; j < 10000000; j++) {
- retry = rtsFalse;
- for (i=0; i < n_threads; i++) {
- if (i == me) continue;
- write_barrier();
- setContextSwitches();
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
- retry = rtsTrue;
- }
- }
- if (!retry) break;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ prodCapability(&capabilities[i], cap->running_task);
hunk ./rts/sm/GC.c 1178
+ setContextSwitches();
+ $
+ gc_threads_go = rtsFalse;
+ $
+ ACQUIRE_LOCK(&gc_threads_ready_lock);
+ while (gc_threads_ready < n_threads-1) {
+ debugTrace(DEBUG_gc, "waitForGcThreads: gc_threads_ready =
%d", gc_threads_ready);
+ waitCondition(&gc_threads_ready_cond,
&gc_threads_ready_lock);
+ } $
+ gc_threads_ready = 0;
+ RELEASE_LOCK(&gc_threads_ready_lock);
hunk ./rts/sm/GC.c 1213
- ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
- RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
+// ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+// RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
hunk ./rts/sm/GC.c 1216
+
+ ACQUIRE_LOCK(&gc_threads_go_lock);
+ gc_threads_go = rtsTrue;
+ broadcastCondition(&gc_threads_go_cond);
+ RELEASE_LOCK(&gc_threads_go_lock);
hunk ./rts/sm/GC.c 1231
- nat i;
- for (i=0; i < n_threads; i++) {
- if (i == me) continue;
- while (gc_threads[i]->wakeup !=
GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
+ ACQUIRE_LOCK(&gc_threads_ready_lock);
+ while (gc_threads_ready < n_threads-1) {
+ debugTrace(DEBUG_gc, "shutdown_gc_threads: %d",
gc_threads_ready);
+ waitCondition(&gc_threads_ready_cond,
&gc_threads_ready_lock);
hunk ./rts/sm/GC.c 1236
+ gc_threads_ready = 0;
+ RELEASE_LOCK(&gc_threads_ready_lock);
hunk ./rts/sm/GC.c 1243
-releaseGCThreads (Capability *cap USED_IF_THREADS)
+releaseGCThreads (Capability *cap STG_UNUSED)
hunk ./rts/sm/GC.c 1245
- nat n_threads = RtsFlags.ParFlags.nNodes;
- nat me = cap->no;
- nat i;
- for (i=0; i < n_threads; i++) {
- if (i == me) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) $
- barf("releaseGCThreads");
- $
- gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
- ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
- RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
- }
+ debugTrace(DEBUG_gc, "releaseGCThreads");
+ ACQUIRE_LOCK(&gc_threads_go_lock);
+ gc_threads_go = rtsFalse;
+ broadcastCondition(&gc_threads_go_cond);
+ RELEASE_LOCK(&gc_threads_go_lock);
hunk ./rts/sm/GCThread.h 119
- SpinLock gc_spin;
- SpinLock mut_spin;
}
}}}
--
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/3553#comment:1>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler_______________________________________________
Glasgow-haskell-bugs mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs