On Fri, Nov 25, 2011 at 08:38:39AM +0100, Jakub Jelinek wrote:
> Furthermore, I'd prefer if the patch could be split into smaller parts,
> e.g. for bisecting purposes.  One patch would do the mutex changes
> to use new atomics, remove extra mutex.h headers and start using 0/1/-1
> instead of 0/1/2.  And another patch would rewrite the semaphores.

As requested, this is the semaphore rewrite.  The code is modelleled
on the existing mutex implementation, using a single int to hold 31
bits of count info and a wait flag bit.  Like the mutexes, these
semaphores have the nice property that no syscalls are made unless
threads are waiting on the semaphore, and if we reach the contended
case, there is an orderly transition back to uncontended.  Also, I
believe this implementation uses the bare minimum of synchronization
instructions.

Unlike the old semaphore implementation, I do not try to wake multiple
threads in the sem_post_slow futex_wake call.  That's because it is
necessary to wake further threads if possible on exiting from
sem_wait_slow, and given that is necessary, then waking multiple
threads just leads to unnecessary futex_wake syscalls.  You can see
why the wake in sem_wait_slow is necessary by considering a semaphore
set up to allow two threads to work on some job.  If there were four
threads trying to get the semaphore, two would succeed, A and B, and
two, C and D, end up waiting in gomp_set_wait_slow.  When A finishes
its job, it will call gomp_sem_post to increment the semaphore, see
and clear the wait flag, and call futex_wake on one thread.  If B also
finishes while this is happening, but before C has woken, then B's
call to gomp_sem_post may not see the wait flag set, leaving D asleep.

Incidentally, the regression I mentioned in
http://gcc.gnu.org/ml/gcc-patches/2011-11/msg02393.html wasn't real.
(I'd been testing for failures with a script that repeatedly hammered
selected libgomp tests with
while LD_LIBRARY_PATH=blahblah ./sometest; do :; done
because a single gcc testsuite run often doesn't catch locking
failures.  The regression was due to a typo on LD_LIBRARY_PATH..)

Bootstrapped and regression tested powerpc-linux.  OK to apply?

        PR libgomp/51249
        * config/linux/sem.h: Rewrite.
        * config/linux/sem.c: Rewrite.

Index: libgomp/config/linux/sem.h
===================================================================
--- libgomp/config/linux/sem.h  (revision 181718)
+++ libgomp/config/linux/sem.h  (working copy)
@@ -24,34 +24,74 @@
 
 /* This is a Linux specific implementation of a semaphore synchronization
    mechanism for libgomp.  This type is private to the library.  This 
-   implementation uses atomic instructions and the futex syscall.  */
+   counting semaphore implementation uses atomic instructions and the
+   futex syscall, and a single 32-bit int to store semaphore state.
+   The low 31 bits are the count, the top bit is a flag set when some
+   threads may be waiting.  */
 
 #ifndef GOMP_SEM_H
 #define GOMP_SEM_H 1
 
 typedef int gomp_sem_t;
 
-static inline void gomp_sem_init (gomp_sem_t *sem, int value)
+enum memmodel
+{
+  MEMMODEL_RELAXED = 0,
+  MEMMODEL_CONSUME = 1,
+  MEMMODEL_ACQUIRE = 2,
+  MEMMODEL_RELEASE = 3,
+  MEMMODEL_ACQ_REL = 4,
+  MEMMODEL_SEQ_CST = 5,
+  MEMMODEL_LAST = 6
+};
+
+extern void gomp_sem_wait_slow (gomp_sem_t *, int);
+extern void gomp_sem_post_slow (gomp_sem_t *);
+
+static inline void
+gomp_sem_init (gomp_sem_t *sem, int value)
 {
   *sem = value;
 }
 
-extern void gomp_sem_wait_slow (gomp_sem_t *);
-static inline void gomp_sem_wait (gomp_sem_t *sem)
+static inline void
+gomp_sem_destroy (gomp_sem_t *sem)
 {
-  if (!__sync_bool_compare_and_swap (sem, 1, 0))
-    gomp_sem_wait_slow (sem);
 }
 
-extern void gomp_sem_post_slow (gomp_sem_t *);
-static inline void gomp_sem_post (gomp_sem_t *sem)
+static inline void
+gomp_sem_wait (gomp_sem_t *sem)
 {
-  if (!__sync_bool_compare_and_swap (sem, 0, 1))
-    gomp_sem_post_slow (sem);
+  int count = *sem;
+
+  while ((count & 0x7fffffff) != 0)
+    {
+      int oldval = count;
+      __atomic_compare_exchange_4 (sem, &oldval, count - 1,
+                                  false, MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+      if (__builtin_expect (oldval == count, 1))
+       return;
+      count = oldval;
+    }
+  gomp_sem_wait_slow (sem, count);
 }
 
-static inline void gomp_sem_destroy (gomp_sem_t *sem)
+static inline void
+gomp_sem_post (gomp_sem_t *sem)
 {
-}
+  int count = *sem;
+
+  while (1)
+    {
+      int oldval = count;
+      __atomic_compare_exchange_4 (sem, &oldval, (count + 1) & 0x7fffffff,
+                                  false, MEMMODEL_RELEASE, MEMMODEL_RELAXED);
+      if (__builtin_expect (oldval == count, 1))
+       break;
+      count = oldval;
+    }
 
+  if (__builtin_expect (count & 0x80000000, 0))
+    gomp_sem_post_slow (sem);
+}
 #endif /* GOMP_SEM_H */
Index: libgomp/config/linux/sem.c
===================================================================
--- libgomp/config/linux/sem.c  (revision 181718)
+++ libgomp/config/linux/sem.c  (working copy)
@@ -28,34 +28,69 @@
 
 #include "wait.h"
 
-
 void
-gomp_sem_wait_slow (gomp_sem_t *sem)
+gomp_sem_wait_slow (gomp_sem_t *sem, int count)
 {
-  while (1)
+  int oldval, newval;
+
+  /* First loop spins a while.  */
+  while (count == 0)
     {
-      int val = __sync_val_compare_and_swap (sem, 0, -1);
-      if (val > 0)
+      if (do_spin (sem, 0))
+       {
+         /* Spin timeout, nothing changed.  Set waiting flag.  */
+         oldval = 0;
+         __atomic_compare_exchange_4 (sem, &oldval, 0x80000000, false,
+                                      MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+         count = oldval;
+         if (oldval == 0)
+           {
+             futex_wait (sem, 0x80000000);
+             count = *sem;
+           }
+         break;
+       }
+      /* Something changed.  If positive, we're good to go.  */
+      else if ((count = *sem) > 0)
        {
-         if (__sync_bool_compare_and_swap (sem, val, val - 1))
+         oldval = count;
+         __atomic_compare_exchange_4 (sem, &oldval, count - 1, false,
+                                      MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+         if (oldval == count)
            return;
+         count = oldval;
        }
-      do_wait (sem, -1);
+    }
+
+  /* Second loop waits until semaphore is posted.  We always exit this
+     loop with wait flag set, so next post will awaken a thread.  */
+  while (1)
+    {
+      oldval = count;
+      newval = 0x80000000;
+      if ((count & 0x7fffffff) != 0)
+       newval |= count - 1;
+      __atomic_compare_exchange_4 (sem, &oldval, newval, false,
+                                  MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+      if (oldval == count)
+       {
+         if ((count & 0x7fffffff) != 0)
+           {
+             /* If we can wake more threads, do so now.  */
+             if ((count & 0x7fffffff) > 1)
+               gomp_sem_post_slow (sem);
+             break;
+           }
+         do_wait (sem, 0x80000000);
+         count = *sem;
+       }
+      else
+       count = oldval;
     }
 }
 
 void
 gomp_sem_post_slow (gomp_sem_t *sem)
 {
-  int old, tmp = *sem, wake;
-
-  do
-    {
-      old = tmp;
-      wake = old > 0 ? old + 1 : 1;
-      tmp = __sync_val_compare_and_swap (sem, old, wake);
-    }
-  while (old != tmp);
-
-  futex_wake (sem, wake);
+  futex_wake (sem, 1);
 }

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to