I noticed that our MI __mp_lock (kernel lock, sched lock)
implementation is based on a ticket lock without any backoff.
The downside of this algorithm is that it results in bus trafic increase
because all the actors are writing (lock releaser) and reading (lock
waiters) the same memory region from different cpus.  Reducing
effectively that contention with a proportional backoff doesn't look
easy, since we have to target the minimum backoff time even though we
ignore how much time the lock will actually be held.

Some algorithms (MCS, CLH, M-lock) avoid the sharing by building a queue
and letting waiters spin on a mostly private memory region.  I initially
tried the MCS lock and it gave good results on amd64 and riscv64, until
it broke on arm64.  Which kinda confirms what I read in various papers:
it looks simple but it's hard to get right.

The M-lock implementation below is easy to understand and seems to give
better (or similar) results than both ticket lock and MCS on amd64,
arm64 and riscv64 machines.  False sharing is partly avoided at the
expense of growing the data structure, which should be fine since the
__mp_lock is only used for the kernel and sched lock.

I'm showing this off right now to see if there is interest.  Benchmarks
results would help confirm that.  I don't own big machines where it
would really shine.


Index: kern/kern_lock.c
===================================================================
RCS file: /home/cvs/src/sys/kern/kern_lock.c,v
retrieving revision 1.72
diff -u -p -r1.72 kern_lock.c
--- kern/kern_lock.c    26 Apr 2022 15:31:14 -0000      1.72
+++ kern/kern_lock.c    26 Jun 2022 13:41:42 -0000
@@ -27,6 +27,7 @@
 
 #include <ddb/db_output.h>
 
+//#define MP_LOCKDEBUG
 #ifdef MP_LOCKDEBUG
 #ifndef DDB
 #error "MP_LOCKDEBUG requires DDB"
@@ -80,16 +81,28 @@ _kernel_lock_held(void)
 
 #ifdef __USE_MI_MPLOCK
 
-/* Ticket lock implementation */
+/*
+ * M-lock implementation from
+ * Design of a minimal-contention lock: M-lock Technical Report
+ * CS-PM-2018-08-19
+ * 
https://homes.cs.ru.ac.za/philip/Publications/Techreports/2018/M-lock-report/M-lock-report.pdf
+ */
 
 #include <machine/cpu.h>
 
 void
 ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type)
 {
-       memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus));
-       mpl->mpl_users = 0;
-       mpl->mpl_ticket = 1;
+       int     i;
+
+       memset(mpl, 0, sizeof(*mpl));
+       mpl->mpl_initial_lock.lockval = 0;
+       mpl->mpl_global_lock = &mpl->mpl_initial_lock;
+       for (i = 0; i < nitems(mpl->mpl_cpus); i++) {
+               mpl->mpl_cpus[i].lock_storage.lockval = 1;
+               mpl->mpl_cpus[i].mylock = &mpl->mpl_cpus[i].lock_storage;
+               mpl->mpl_cpus[i].spinon = &mpl->mpl_cpus[i].lock_storage;
+       }
 
 #ifdef WITNESS
        mpl->mpl_lock_obj.lo_name = type->lt_name;
@@ -105,7 +118,7 @@ ___mp_lock_init(struct __mp_lock *mpl, c
 }
 
 static __inline void
-__mp_lock_spin(struct __mp_lock *mpl, u_int me)
+__mp_lock_spin(struct __mp_lock *mpl, struct __mp_lock_cpu *lock)
 {
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
 #ifdef MP_LOCKDEBUG
@@ -113,7 +126,7 @@ __mp_lock_spin(struct __mp_lock *mpl, u_
 #endif
 
        spc->spc_spinning++;
-       while (mpl->mpl_ticket != me) {
+       while (lock->spinon->lockval != 0) {
                CPU_BUSY_CYCLE();
 
 #ifdef MP_LOCKDEBUG
@@ -140,17 +153,27 @@ __mp_lock(struct __mp_lock *mpl)
 #endif
 
        s = intr_disable();
-       if (cpu->mplc_depth++ == 0)
-               cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users);
+       if (cpu->mplc_depth++ == 0) {
+               cpu->spinon = atomic_swap_ptr(&mpl->mpl_global_lock,
+                   cpu->mylock);
+       }
        intr_restore(s);
 
-       __mp_lock_spin(mpl, cpu->mplc_ticket);
+       __mp_lock_spin(mpl, cpu);
        membar_enter_after_atomic();
 
        WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE);
 }
 
 void
+__mp_lock_do_release(struct __mp_lock_cpu *lock)
+{
+       lock->mylock->lockval = 0;
+       lock->mylock = lock->spinon;
+       lock->mylock->lockval = 1; /* ready for next time */
+}
+
+void
 __mp_unlock(struct __mp_lock *mpl)
 {
        struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
@@ -168,7 +191,7 @@ __mp_unlock(struct __mp_lock *mpl)
        s = intr_disable();
        if (--cpu->mplc_depth == 0) {
                membar_exit();
-               mpl->mpl_ticket++;
+               __mp_lock_do_release(cpu);
        }
        intr_restore(s);
 }
@@ -191,7 +214,7 @@ __mp_release_all(struct __mp_lock *mpl)
 #endif
        cpu->mplc_depth = 0;
        membar_exit();
-       mpl->mpl_ticket++;
+       __mp_lock_do_release(cpu);
        intr_restore(s);
 
        return (rv);
@@ -233,7 +256,7 @@ __mp_lock_held(struct __mp_lock *mpl, st
 {
        struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[CPU_INFO_UNIT(ci)];
 
-       return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0);
+       return (cpu->spinon->lockval == 0 && cpu->mplc_depth > 0);
 }
 
 #endif /* __USE_MI_MPLOCK */
Index: sys/mplock.h
===================================================================
RCS file: /home/cvs/src/sys/sys/mplock.h,v
retrieving revision 1.13
diff -u -p -r1.13 mplock.h
--- sys/mplock.h        23 Apr 2019 13:35:12 -0000      1.13
+++ sys/mplock.h        25 Jun 2022 14:25:29 -0000
@@ -33,15 +33,21 @@
 
 #include <sys/_lock.h>
 
+typedef struct {
+       volatile u_int          lockval;
+} m_lock_t;
+
 struct __mp_lock_cpu {
-       u_int                   mplc_ticket;
-       u_int                   mplc_depth;
+       m_lock_t                 lock_storage __aligned(64);
+       m_lock_t                *mylock;
+       m_lock_t                *spinon;
+       u_int                    mplc_depth;
 };
 
 struct __mp_lock {
        struct __mp_lock_cpu    mpl_cpus[MAXCPUS];
-       volatile u_int          mpl_ticket;
-       u_int                   mpl_users;
+       m_lock_t                mpl_initial_lock;
+       m_lock_t                *mpl_global_lock;
 #ifdef WITNESS
        struct lock_object      mpl_lock_obj;
 #endif


-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE

Reply via email to