Re: macppc bsd.mp pmap's hash lock

George Koehler Tue, 18 May 2021 21:28:29 -0700

On Thu, 13 May 2021 02:20:45 -0400
George Koehler <kern...@gmail.com> wrote:


> My last diff (11 May 2021) still has a potential problem with memory
> barriers.  I will mail a new diff if I think of a fix.

Here's my new diff.  It is a copy of what I sent to ppc@ a moment ago.
I would ask, "Is it ok to commit?", but while writing this mail, I
found a 2nd potential problem with memory barriers, so I will need to
edit this diff yet again.

I add a 2nd membar_enter() to __ppc_lock() to avoid my 1st "potential
problem".  I also simplify the code by changing __ppc_lock_spin to
check the owning cpu, not the count.

A moment ago, I sent a copy of this diff to ppc@, in reply to another
report of bsd.mp hanging at "using parent ..." on a G5.  My own hang
happened when the function __ppc_lock(), in a randomly reordered
bsd.mp, crossed a page boundary and caused a page fault with a
recursive call to __ppc_lock().  The goal of my diff is to prevent
such hangs, by allowing such recursive calls to work.

I acquire the lock with an atomic 32-bit write, so the lock is always
in a valid state before or after the write.  The old code did spin
until mpl_count == 0.  In my earlier diffs, I acquired the lock by
setting both the owning cpu and a nonzero count in one write, so I
needed to pack both owner and count in a 32-bit integer.

This diff spins until mpl_cpu == NULL.  I acquire the lock by setting
only mpl_cpu and leaving mpl_count at 0.  So mpl_cpu is the spinlock,
and mpl_count is a variable protected by the lock.  I no longer need
to pack both fields in a 32-bit integer, so I get rid of my BOLT*
macros for packing and unpacking the fields.

I need 2 memory barriers:
  1. after acquiring the lock, before accessing the locked data.
  2. after accessing the locked data, before releasing the lock.

I added the 2nd membar_enter(), because I feared that a cpu would
acquire the lock, but get a page fault (and recursive call) before
it would reach the memory barrier.  I didn't think of the opposite
case, where a cpu would do membar_exit(), but gets a page fault
before it would release the lock.  This is the 2nd potential problem
that I didn't fix.  I didn't observe an actual problem after running
this diff for more than 24 hours.

--George

Index: sys/arch/powerpc/include/mplock.h
===================================================================
RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v
retrieving revision 1.4
diff -u -p -r1.4 mplock.h
--- sys/arch/powerpc/include/mplock.h   15 Apr 2020 08:09:00 -0000      1.4
+++ sys/arch/powerpc/include/mplock.h   16 May 2021 00:51:41 -0000
@@ -30,13 +30,14 @@
 #define __USE_MI_MPLOCK
 
 /*
+ * __ppc_lock exists because pte_spill_r() can't use __mp_lock.
  * Really simple spinlock implementation with recursive capabilities.
  * Correctness is paramount, no fancyness allowed.
  */
 
 struct __ppc_lock {
-       volatile struct cpu_info *mpl_cpu;
-       volatile long           mpl_count;
+       struct cpu_info *volatile       mpl_cpu;
+       long                            mpl_count;
 };
 
 #ifndef _LOCORE
@@ -44,10 +45,6 @@ struct __ppc_lock {
 void __ppc_lock_init(struct __ppc_lock *);
 void __ppc_lock(struct __ppc_lock *);
 void __ppc_unlock(struct __ppc_lock *);
-int __ppc_release_all(struct __ppc_lock *);
-int __ppc_release_all_but_one(struct __ppc_lock *);
-void __ppc_acquire_count(struct __ppc_lock *, int);
-int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *);
 
 #endif
 
Index: sys/arch/powerpc/powerpc/lock_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v
retrieving revision 1.9
diff -u -p -r1.9 lock_machdep.c
--- sys/arch/powerpc/powerpc/lock_machdep.c     15 Apr 2020 08:09:00 -0000      
1.9
+++ sys/arch/powerpc/powerpc/lock_machdep.c     16 May 2021 00:51:41 -0000
@@ -1,6 +1,7 @@
 /*     $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $    */
 
 /*
+ * Copyright (c) 2021 George Koehler <gkoeh...@openbsd.org>
  * Copyright (c) 2007 Artur Grabowski <a...@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -22,10 +23,21 @@
 #include <sys/atomic.h>
 
 #include <machine/cpu.h>
-#include <machine/psl.h>
 
 #include <ddb/db_output.h>
 
+/*
+ * If __ppc_lock() crosses a page boundary in the kernel text, then it
+ * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r()
+ * would recursively call __ppc_lock().  The lock must be in a valid
+ * state when the page fault happens.  We acquire or release the lock
+ * with a 32-bit atomic write to mpl_owner, so the lock is always in a
+ * valid state, before or after the write.
+ *
+ * Acquired the lock:  mpl->mpl_cpu == curcpu()
+ * Released the lock:  mpl->mpl_cpu == NULL
+ */
+
 void
 __ppc_lock_init(struct __ppc_lock *lock)
 {
@@ -46,12 +58,12 @@ static __inline void
 __ppc_lock_spin(struct __ppc_lock *mpl)
 {
 #ifndef MP_LOCKDEBUG
-       while (mpl->mpl_count != 0)
+       while (mpl->mpl_cpu != NULL)
                CPU_BUSY_CYCLE();
 #else
        int nticks = __mp_lock_spinout;
 
-       while (mpl->mpl_count != 0 && --nticks > 0)
+       while (mpl->mpl_cpu != NULL && --nticks > 0)
                CPU_BUSY_CYCLE();
 
        if (nticks == 0) {
@@ -65,32 +77,33 @@ void
 __ppc_lock(struct __ppc_lock *mpl)
 {
        /*
-        * Please notice that mpl_count gets incremented twice for the
-        * first lock. This is on purpose. The way we release the lock
-        * in mp_unlock is to decrement the mpl_count and then check if
-        * the lock should be released. Since mpl_count is what we're
-        * spinning on, decrementing it in mpl_unlock to 0 means that
-        * we can't clear mpl_cpu, because we're no longer holding the
-        * lock. In theory mpl_cpu doesn't need to be cleared, but it's
-        * safer to clear it and besides, setting mpl_count to 2 on the
-        * first lock makes most of this code much simpler.
+        * Please notice that mpl_count stays at 0 for the first lock.
+        * A page fault might recursively call __ppc_lock() after we
+        * set mpl_cpu, but before we can increase mpl_count.
+        *
+        * After we acquire the lock, we need a "bc; isync" memory
+        * barrier, but we might not reach the barrier before the next
+        * page fault.  Then the fault's recursive __ppc_lock() must
+        * have a barrier.  membar_enter() is just "isync" and must
+        * come after a conditional branch for holding the lock.
         */
 
        while (1) {
-               int s;
+               struct cpu_info *owner = mpl->mpl_cpu;
+               struct cpu_info *ci = curcpu();
 
-               s = ppc_intr_disable();
-               if (atomic_cas_ulong(&mpl->mpl_count, 0, 1) == 0) {
+               if (owner == NULL) {
+                       /* Try to acquire the lock. */
+                       if (atomic_cas_ptr(&mpl->mpl_cpu, NULL, ci) == NULL) {
+                               membar_enter();
+                               break;
+                       }
+               } else if (owner == ci) {
+                       /* We hold the lock, but might need a barrier. */
                        membar_enter();
-                       mpl->mpl_cpu = curcpu();
-               }
-
-               if (mpl->mpl_cpu == curcpu()) {
                        mpl->mpl_count++;
-                       ppc_intr_enable(s);
                        break;
                }
-               ppc_intr_enable(s);
 
                __ppc_lock_spin(mpl);
        }
@@ -99,8 +112,6 @@ __ppc_lock(struct __ppc_lock *mpl)
 void
 __ppc_unlock(struct __ppc_lock *mpl)
 {
-       int s;
-
 #ifdef MP_LOCKDEBUG
        if (mpl->mpl_cpu != curcpu()) {
                db_printf("__ppc_unlock(%p): not held lock\n", mpl);
@@ -108,63 +119,10 @@ __ppc_unlock(struct __ppc_lock *mpl)
        }
 #endif
 
-       s = ppc_intr_disable();
-       if (--mpl->mpl_count == 1) {
-               mpl->mpl_cpu = NULL;
+       if (mpl->mpl_count == 0) {
+               /* Release the lock. */
                membar_exit();
-               mpl->mpl_count = 0;
-       }
-       ppc_intr_enable(s);
-}
-
-int
-__ppc_release_all(struct __ppc_lock *mpl)
-{
-       int rv = mpl->mpl_count - 1;
-       int s;
-
-#ifdef MP_LOCKDEBUG
-       if (mpl->mpl_cpu != curcpu()) {
-               db_printf("__ppc_release_all(%p): not held lock\n", mpl);
-               db_enter();
-       }
-#endif
-
-       s = ppc_intr_disable();
-       mpl->mpl_cpu = NULL;
-       membar_exit();
-       mpl->mpl_count = 0;
-       ppc_intr_enable(s);
-
-       return (rv);
-}
-
-int
-__ppc_release_all_but_one(struct __ppc_lock *mpl)
-{
-       int rv = mpl->mpl_count - 2;
-
-#ifdef MP_LOCKDEBUG
-       if (mpl->mpl_cpu != curcpu()) {
-               db_printf("__ppc_release_all_but_one(%p): not held lock\n", 
mpl);
-               db_enter();
-       }
-#endif
-
-       mpl->mpl_count = 2;
-
-       return (rv);
-}
-
-void
-__ppc_acquire_count(struct __ppc_lock *mpl, int count)
-{
-       while (count--)
-               __ppc_lock(mpl);
-}
-
-int
-__ppc_lock_held(struct __ppc_lock *mpl, struct cpu_info *ci)
-{
-       return mpl->mpl_cpu == ci;
+               mpl->mpl_cpu = NULL;
+       } else
+               mpl->mpl_count--;
 }

Re: macppc bsd.mp pmap's hash lock

Reply via email to