Hello tech list,

If you have a macppc with more than one cpu, I would like you to try
this diff in the GENERIC.MP kernel.  I am running it on a dual G5
(without radeondrm and not running X).  I don't know whether I want to
commit this diff.

In late April, my G5's kernel froze very early during boot, while
trying to map the framebuffer.  The problem went away after reordering
and relinking the kernel.  I kept a copy of the bad kernel.  I found
the problem on Tuesday: __ppc_lock() crossed a page boundary.

$ nm -n /bsd.crash | grep __ppc_lock
$ objdump -dlr --start-ad=0x27bf8c /bsd.crash|less

The disassembly had 0x27fbf8c <= __ppc_lock < 0x27c058, so it crossed
pages at 0x27c000; page size = 0x1000 = 4096.  On a G5, the kernel
lazily faults in its own pages.  The page fault at 0x27c000 inside
__ppc_lock caused a recursive call to __ppc_lock.  I believe that the
fault happened here in __ppc_lock:

                s = ppc_intr_disable();
                if (atomic_cas_ulong(&mpl->mpl_count, 0, 1) == 0) {
                        membar_enter();
                        mpl->mpl_cpu = curcpu();
                }

                if (mpl->mpl_cpu == curcpu()) {
//-->                   // page fault!  recursive call to __ppc_lock
                        mpl->mpl_count++;
                        ppc_intr_enable(s);

This is bad, because the lock is not in a valid state when the page
fault happens.  The code tries ppc_intr_disable to protect the lock,
but this doesn't disable page faults.

The lock is a recursive spinlock that protects the pmap's hash table.
My bad kernel tried to grab the lock to insert the 1st page of the
framebuffer into the hash, and then tried to recursively grab the lock
to insert page 0x27c000 into the hash.  I debugged the problem by
copying the bad kernel and overwriting some asm to insert some extra
printf()s.  The problem went away when my asm caused an earlier access
to page 0x27c000.

When I reorder the kernel, __ppc_lock gets a different address, and
probably doesn't cross pages with a not-valid lock; but I can force
a page fault this way:

                        __asm volatile("b 1f; . = . + 4096; 1:");

If I insert this asm at my above "//-->" and compile GENERIC.MP, then
it reproduces the freezing problem on my G5.

The problem doesn't happen on a G3 or G4, where the kernel uses block
address translation (!ppc_nobat); I copied my bad kernel to a G4 and
it didn't freeze there.  The problem doesn't happen in bsd.sp, where
the lock doesn't exist.  Our powerpc64 kernel doesn't fault in its own
pages this way.  I have observed the problem only with macppc on G5.

In this diff, I try to fix the problem by shrinking the lock to 32
bits, and using 32-bit atomic ops to keep the lock in a valid state.
This __ppc_lock is no longer the __mp_lock, but is only the pmap's
hash lock, so I also delete some unused functions.

--George

Index: arch/powerpc/include/mplock.h
===================================================================
RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v
retrieving revision 1.4
diff -u -p -r1.4 mplock.h
--- arch/powerpc/include/mplock.h       15 Apr 2020 08:09:00 -0000      1.4
+++ arch/powerpc/include/mplock.h       6 May 2021 20:01:08 -0000
@@ -30,13 +30,13 @@
 #define __USE_MI_MPLOCK
 
 /*
+ * __ppc_lock exists because pte_spill_r() can't use __mp_lock.
  * Really simple spinlock implementation with recursive capabilities.
  * Correctness is paramount, no fancyness allowed.
  */
 
 struct __ppc_lock {
-       volatile struct cpu_info *mpl_cpu;
-       volatile long           mpl_count;
+       volatile unsigned int   mpl_bolt;
 };
 
 #ifndef _LOCORE
@@ -44,10 +44,6 @@ struct __ppc_lock {
 void __ppc_lock_init(struct __ppc_lock *);
 void __ppc_lock(struct __ppc_lock *);
 void __ppc_unlock(struct __ppc_lock *);
-int __ppc_release_all(struct __ppc_lock *);
-int __ppc_release_all_but_one(struct __ppc_lock *);
-void __ppc_acquire_count(struct __ppc_lock *, int);
-int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *);
 
 #endif
 
Index: arch/powerpc/powerpc/lock_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v
retrieving revision 1.9
diff -u -p -r1.9 lock_machdep.c
--- arch/powerpc/powerpc/lock_machdep.c 15 Apr 2020 08:09:00 -0000      1.9
+++ arch/powerpc/powerpc/lock_machdep.c 6 May 2021 20:01:08 -0000
@@ -1,6 +1,7 @@
 /*     $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $    */
 
 /*
+ * Copyright (c) 2021 George Koehler <gkoeh...@openbsd.org>
  * Copyright (c) 2007 Artur Grabowski <a...@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -22,15 +23,29 @@
 #include <sys/atomic.h>
 
 #include <machine/cpu.h>
-#include <machine/psl.h>
 
 #include <ddb/db_output.h>
 
+/*
+ * If __ppc_lock() crosses a page boundary in the kernel text, then it
+ * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r()
+ * would recursively call __ppc_lock().  The lock must be in a valid
+ * state when the page fault happens.
+ *
+ * This lock has 2 fields, "cpuid" and "count", packed in a 32-bit
+ * "bolt", so we can use 32-bit atomic ops to ensure that our lock is
+ * always in a valid state.
+ */
+#define BOLT(cpuid, count)     ((cpuid) << 24 | (count) & 0x00ffffff)
+#define BOLT_CPUID(bolt)       ((bolt) >> 24)
+#define BOLT_COUNT(bolt)       ((bolt) & 0x00ffffff)
+#define BOLT_LOCKED(bolt)      (BOLT_COUNT(bolt) != 0)
+#define BOLT_UNLOCKED(bolt)    (BOLT_COUNT(bolt) == 0)
+
 void
 __ppc_lock_init(struct __ppc_lock *lock)
 {
-       lock->mpl_cpu = NULL;
-       lock->mpl_count = 0;
+       lock->mpl_bolt = 0;
 }
 
 #if defined(MP_LOCKDEBUG)
@@ -46,12 +61,12 @@ static __inline void
 __ppc_lock_spin(struct __ppc_lock *mpl)
 {
 #ifndef MP_LOCKDEBUG
-       while (mpl->mpl_count != 0)
+       while (BOLT_LOCKED(mpl->mpl_bolt))
                CPU_BUSY_CYCLE();
 #else
        int nticks = __mp_lock_spinout;
 
-       while (mpl->mpl_count != 0 && --nticks > 0)
+       while (BOLT_LOCKED(mpl->mpl_bolt) && --nticks > 0)
                CPU_BUSY_CYCLE();
 
        if (nticks == 0) {
@@ -64,33 +79,21 @@ __ppc_lock_spin(struct __ppc_lock *mpl)
 void
 __ppc_lock(struct __ppc_lock *mpl)
 {
-       /*
-        * Please notice that mpl_count gets incremented twice for the
-        * first lock. This is on purpose. The way we release the lock
-        * in mp_unlock is to decrement the mpl_count and then check if
-        * the lock should be released. Since mpl_count is what we're
-        * spinning on, decrementing it in mpl_unlock to 0 means that
-        * we can't clear mpl_cpu, because we're no longer holding the
-        * lock. In theory mpl_cpu doesn't need to be cleared, but it's
-        * safer to clear it and besides, setting mpl_count to 2 on the
-        * first lock makes most of this code much simpler.
-        */
+       int id = curcpu()->ci_cpuid;
 
        while (1) {
-               int s;
-
-               s = ppc_intr_disable();
-               if (atomic_cas_ulong(&mpl->mpl_count, 0, 1) == 0) {
-                       membar_enter();
-                       mpl->mpl_cpu = curcpu();
-               }
+               unsigned int bolt = mpl->mpl_bolt;
 
-               if (mpl->mpl_cpu == curcpu()) {
-                       mpl->mpl_count++;
-                       ppc_intr_enable(s);
+               if (BOLT_UNLOCKED(bolt)) {
+                       /* Try to grab the lock.  count = 1 */
+                       if (atomic_cas_uint(&mpl->mpl_bolt, bolt,
+                           BOLT(id, 1)) == bolt)
+                               break;
+               } else if (BOLT_CPUID(bolt) == id) {
+                       /* We hold the lock.  count++ */
+                       mpl->mpl_bolt = BOLT(id, BOLT_COUNT(bolt) + 1);
                        break;
                }
-               ppc_intr_enable(s);
 
                __ppc_lock_spin(mpl);
        }
@@ -99,72 +102,16 @@ __ppc_lock(struct __ppc_lock *mpl)
 void
 __ppc_unlock(struct __ppc_lock *mpl)
 {
-       int s;
+       unsigned int bolt = mpl->mpl_bolt;
 
 #ifdef MP_LOCKDEBUG
-       if (mpl->mpl_cpu != curcpu()) {
+       if (BOLT_CPUID(bolt) != curcpu()->ci_cpuid ||
+           BOLT_COUNT(bolt) == 0) {
                db_printf("__ppc_unlock(%p): not held lock\n", mpl);
                db_enter();
        }
 #endif
 
-       s = ppc_intr_disable();
-       if (--mpl->mpl_count == 1) {
-               mpl->mpl_cpu = NULL;
-               membar_exit();
-               mpl->mpl_count = 0;
-       }
-       ppc_intr_enable(s);
-}
-
-int
-__ppc_release_all(struct __ppc_lock *mpl)
-{
-       int rv = mpl->mpl_count - 1;
-       int s;
-
-#ifdef MP_LOCKDEBUG
-       if (mpl->mpl_cpu != curcpu()) {
-               db_printf("__ppc_release_all(%p): not held lock\n", mpl);
-               db_enter();
-       }
-#endif
-
-       s = ppc_intr_disable();
-       mpl->mpl_cpu = NULL;
-       membar_exit();
-       mpl->mpl_count = 0;
-       ppc_intr_enable(s);
-
-       return (rv);
-}
-
-int
-__ppc_release_all_but_one(struct __ppc_lock *mpl)
-{
-       int rv = mpl->mpl_count - 2;
-
-#ifdef MP_LOCKDEBUG
-       if (mpl->mpl_cpu != curcpu()) {
-               db_printf("__ppc_release_all_but_one(%p): not held lock\n", 
mpl);
-               db_enter();
-       }
-#endif
-
-       mpl->mpl_count = 2;
-
-       return (rv);
-}
-
-void
-__ppc_acquire_count(struct __ppc_lock *mpl, int count)
-{
-       while (count--)
-               __ppc_lock(mpl);
-}
-
-int
-__ppc_lock_held(struct __ppc_lock *mpl, struct cpu_info *ci)
-{
-       return mpl->mpl_cpu == ci;
+       /* count-- */
+       mpl->mpl_bolt = BOLT(BOLT_CPUID(bolt), BOLT_COUNT(bolt) - 1);
 }

Reply via email to