#3553: parallel gc suffers badly if one thread is descheduled
---------------------------------+------------------------------------------
    Reporter:  simonmar          |        Owner:                  
        Type:  bug               |       Status:  new             
    Priority:  normal            |    Milestone:  6.12.2          
   Component:  Runtime System    |      Version:  6.10.4          
    Severity:  normal            |   Resolution:                  
    Keywords:                    |   Difficulty:  Unknown         
    Testcase:                    |           Os:  Unknown/Multiple
Architecture:  Unknown/Multiple  |  
---------------------------------+------------------------------------------
Comment (by simonmar):

 I tried using condition variables, but it was significantly slower than
 spinlocks.  Experimental patch attached (or would be, if it wasn't so
 large).

 {{{
 Tue Oct  6 16:32:58 BST 2009  Simon Marlow <[email protected]>
   * EXPERIMENT: use condition variables instead of spinlocks for the GC
 barrier
   This was significantly slower, averaging +20% with 7 cores on
   nofib/parallel.
     {
     hunk ./rts/sm/GC.c 132
     +#if defined(THREADED_RTS)
     +Mutex     gc_threads_ready_lock;
     +Condition gc_threads_ready_cond;
     +nat       gc_threads_ready;
     +
     +Mutex     gc_threads_go_lock;
     +Condition gc_threads_go_cond;
     +rtsBool   gc_threads_go;
     +#endif
     +
     hunk ./rts/sm/GC.c 885
     -    initSpinLock(&t->gc_spin);
     -    initSpinLock(&t->mut_spin);
     -    ACQUIRE_SPIN_LOCK(&t->gc_spin);
     hunk ./rts/sm/GC.c 937
     +
     +        initMutex(&gc_threads_ready_lock);
     +        initCondition(&gc_threads_ready_cond);
     +        gc_threads_ready = 0;
     +        initMutex(&gc_threads_go_lock);
     +        initCondition(&gc_threads_go_cond);
     +        gc_threads_go = rtsFalse;
     hunk ./rts/sm/GC.c 1094
     -    // Wait until we're told to wake up
     -    RELEASE_SPIN_LOCK(&gct->mut_spin);
     hunk ./rts/sm/GC.c 1095
     +
     +    // Wait until we're told to wake up
     +    ACQUIRE_LOCK(&gc_threads_ready_lock);
     +    gc_threads_ready++;
     +    debugTrace(DEBUG_gc, "GC thread %d: gc_threads_ready = %d",
 gct->thread_index, gc_threads_ready);
     +    if (gc_threads_ready >= RtsFlags.ParFlags.nNodes-1) {
     +        signalCondition(&gc_threads_ready_cond);
     +    }
     +    RELEASE_LOCK(&gc_threads_ready_lock);
     +
     hunk ./rts/sm/GC.c 1106
     -    ACQUIRE_SPIN_LOCK(&gct->gc_spin);
     +
     +    ACQUIRE_LOCK(&gc_threads_go_lock);
     +    while (!gc_threads_go) {
     +        waitCondition(&gc_threads_go_cond, &gc_threads_go_lock);
     +    }
     +    RELEASE_LOCK(&gc_threads_go_lock);
     hunk ./rts/sm/GC.c 1134
     -    // Wait until we're told to continue
     -    RELEASE_SPIN_LOCK(&gct->gc_spin);
     hunk ./rts/sm/GC.c 1135
     +
     +    // Wait until we're told to continue
     +    ACQUIRE_LOCK(&gc_threads_ready_lock);
     +    gc_threads_ready++;
     +    if (gc_threads_ready >= RtsFlags.ParFlags.nNodes-1) {
     +        signalCondition(&gc_threads_ready_cond);
     +    }
     +    RELEASE_LOCK(&gc_threads_ready_lock);
     +
     hunk ./rts/sm/GC.c 1146
     -    ACQUIRE_SPIN_LOCK(&gct->mut_spin);
     +
     +    ACQUIRE_LOCK(&gc_threads_go_lock);
     +    while (gc_threads_go) {
     +        waitCondition(&gc_threads_go_cond, &gc_threads_go_lock);
     +    }
     +    RELEASE_LOCK(&gc_threads_go_lock);
     +
     hunk ./rts/sm/GC.c 1155
     +    gct->wakeup = GC_THREAD_INACTIVE;
     +
     hunk ./rts/sm/GC.c 1169
     -    nat i, j;
     +    nat i;
     hunk ./rts/sm/GC.c 1172
     -    while(retry) {
     -        for (i=0; i < n_threads; i++) {
     -            if (i == me) continue;
     -            if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
     -                prodCapability(&capabilities[i], cap->running_task);
     -            }
     -        }
     -        for (j=0; j < 10000000; j++) {
     -            retry = rtsFalse;
     -            for (i=0; i < n_threads; i++) {
     -                if (i == me) continue;
     -                write_barrier();
     -                setContextSwitches();
     -                if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
     -                    retry = rtsTrue;
     -                }
     -            }
     -            if (!retry) break;
     +    for (i=0; i < n_threads; i++) {
     +        if (i == me) continue;
     +        if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
     +            prodCapability(&capabilities[i], cap->running_task);
     hunk ./rts/sm/GC.c 1178
     +    setContextSwitches();
     +    $
     +    gc_threads_go = rtsFalse;
     +    $
     +    ACQUIRE_LOCK(&gc_threads_ready_lock);
     +    while (gc_threads_ready < n_threads-1) {
     +        debugTrace(DEBUG_gc, "waitForGcThreads: gc_threads_ready =
 %d", gc_threads_ready);
     +        waitCondition(&gc_threads_ready_cond,
 &gc_threads_ready_lock);
     +    } $
     +    gc_threads_ready = 0;
     +    RELEASE_LOCK(&gc_threads_ready_lock);
     hunk ./rts/sm/GC.c 1213
     -        ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
     -        RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
     +//        ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
     +//        RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
     hunk ./rts/sm/GC.c 1216
     +
     +    ACQUIRE_LOCK(&gc_threads_go_lock);
     +    gc_threads_go = rtsTrue;
     +    broadcastCondition(&gc_threads_go_cond);
     +    RELEASE_LOCK(&gc_threads_go_lock);
     hunk ./rts/sm/GC.c 1231
     -    nat i;
     -    for (i=0; i < n_threads; i++) {
     -        if (i == me) continue;
     -        while (gc_threads[i]->wakeup !=
 GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
     +    ACQUIRE_LOCK(&gc_threads_ready_lock);
     +    while (gc_threads_ready < n_threads-1) {
     +        debugTrace(DEBUG_gc, "shutdown_gc_threads: %d",
 gc_threads_ready);
     +        waitCondition(&gc_threads_ready_cond,
 &gc_threads_ready_lock);
     hunk ./rts/sm/GC.c 1236
     +    gc_threads_ready = 0;
     +    RELEASE_LOCK(&gc_threads_ready_lock);
     hunk ./rts/sm/GC.c 1243
     -releaseGCThreads (Capability *cap USED_IF_THREADS)
     +releaseGCThreads (Capability *cap STG_UNUSED)
     hunk ./rts/sm/GC.c 1245
     -    nat n_threads = RtsFlags.ParFlags.nNodes;
     -    nat me = cap->no;
     -    nat i;
     -    for (i=0; i < n_threads; i++) {
     -        if (i == me) continue;
     -        if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) $
     -            barf("releaseGCThreads");
     -        $
     -        gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
     -        ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
     -        RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
     -    }
     +    debugTrace(DEBUG_gc, "releaseGCThreads");
     +    ACQUIRE_LOCK(&gc_threads_go_lock);
     +    gc_threads_go = rtsFalse;
     +    broadcastCondition(&gc_threads_go_cond);
     +    RELEASE_LOCK(&gc_threads_go_lock);
     hunk ./rts/sm/GCThread.h 119
     -    SpinLock   gc_spin;
     -    SpinLock   mut_spin;
     }
 }}}

-- 
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/3553#comment:1>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
_______________________________________________
Glasgow-haskell-bugs mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs

Reply via email to