I noticed that our MI __mp_lock (kernel lock, sched lock) implementation is based on a ticket lock without any backoff. The downside of this algorithm is that it results in bus trafic increase because all the actors are writing (lock releaser) and reading (lock waiters) the same memory region from different cpus. Reducing effectively that contention with a proportional backoff doesn't look easy, since we have to target the minimum backoff time even though we ignore how much time the lock will actually be held.
Some algorithms (MCS, CLH, M-lock) avoid the sharing by building a queue and letting waiters spin on a mostly private memory region. I initially tried the MCS lock and it gave good results on amd64 and riscv64, until it broke on arm64. Which kinda confirms what I read in various papers: it looks simple but it's hard to get right. The M-lock implementation below is easy to understand and seems to give better (or similar) results than both ticket lock and MCS on amd64, arm64 and riscv64 machines. False sharing is partly avoided at the expense of growing the data structure, which should be fine since the __mp_lock is only used for the kernel and sched lock. I'm showing this off right now to see if there is interest. Benchmarks results would help confirm that. I don't own big machines where it would really shine. Index: kern/kern_lock.c =================================================================== RCS file: /home/cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_lock.c --- kern/kern_lock.c 26 Apr 2022 15:31:14 -0000 1.72 +++ kern/kern_lock.c 26 Jun 2022 13:41:42 -0000 @@ -27,6 +27,7 @@ #include <ddb/db_output.h> +//#define MP_LOCKDEBUG #ifdef MP_LOCKDEBUG #ifndef DDB #error "MP_LOCKDEBUG requires DDB" @@ -80,16 +81,28 @@ _kernel_lock_held(void) #ifdef __USE_MI_MPLOCK -/* Ticket lock implementation */ +/* + * M-lock implementation from + * Design of a minimal-contention lock: M-lock Technical Report + * CS-PM-2018-08-19 + * https://homes.cs.ru.ac.za/philip/Publications/Techreports/2018/M-lock-report/M-lock-report.pdf + */ #include <machine/cpu.h> void ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type) { - memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus)); - mpl->mpl_users = 0; - mpl->mpl_ticket = 1; + int i; + + memset(mpl, 0, sizeof(*mpl)); + mpl->mpl_initial_lock.lockval = 0; + mpl->mpl_global_lock = &mpl->mpl_initial_lock; + for (i = 0; i < nitems(mpl->mpl_cpus); i++) { + mpl->mpl_cpus[i].lock_storage.lockval = 1; + mpl->mpl_cpus[i].mylock = &mpl->mpl_cpus[i].lock_storage; + mpl->mpl_cpus[i].spinon = &mpl->mpl_cpus[i].lock_storage; + } #ifdef WITNESS mpl->mpl_lock_obj.lo_name = type->lt_name; @@ -105,7 +118,7 @@ ___mp_lock_init(struct __mp_lock *mpl, c } static __inline void -__mp_lock_spin(struct __mp_lock *mpl, u_int me) +__mp_lock_spin(struct __mp_lock *mpl, struct __mp_lock_cpu *lock) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; #ifdef MP_LOCKDEBUG @@ -113,7 +126,7 @@ __mp_lock_spin(struct __mp_lock *mpl, u_ #endif spc->spc_spinning++; - while (mpl->mpl_ticket != me) { + while (lock->spinon->lockval != 0) { CPU_BUSY_CYCLE(); #ifdef MP_LOCKDEBUG @@ -140,17 +153,27 @@ __mp_lock(struct __mp_lock *mpl) #endif s = intr_disable(); - if (cpu->mplc_depth++ == 0) - cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); + if (cpu->mplc_depth++ == 0) { + cpu->spinon = atomic_swap_ptr(&mpl->mpl_global_lock, + cpu->mylock); + } intr_restore(s); - __mp_lock_spin(mpl, cpu->mplc_ticket); + __mp_lock_spin(mpl, cpu); membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); } void +__mp_lock_do_release(struct __mp_lock_cpu *lock) +{ + lock->mylock->lockval = 0; + lock->mylock = lock->spinon; + lock->mylock->lockval = 1; /* ready for next time */ +} + +void __mp_unlock(struct __mp_lock *mpl) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; @@ -168,7 +191,7 @@ __mp_unlock(struct __mp_lock *mpl) s = intr_disable(); if (--cpu->mplc_depth == 0) { membar_exit(); - mpl->mpl_ticket++; + __mp_lock_do_release(cpu); } intr_restore(s); } @@ -191,7 +214,7 @@ __mp_release_all(struct __mp_lock *mpl) #endif cpu->mplc_depth = 0; membar_exit(); - mpl->mpl_ticket++; + __mp_lock_do_release(cpu); intr_restore(s); return (rv); @@ -233,7 +256,7 @@ __mp_lock_held(struct __mp_lock *mpl, st { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[CPU_INFO_UNIT(ci)]; - return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0); + return (cpu->spinon->lockval == 0 && cpu->mplc_depth > 0); } #endif /* __USE_MI_MPLOCK */ Index: sys/mplock.h =================================================================== RCS file: /home/cvs/src/sys/sys/mplock.h,v retrieving revision 1.13 diff -u -p -r1.13 mplock.h --- sys/mplock.h 23 Apr 2019 13:35:12 -0000 1.13 +++ sys/mplock.h 25 Jun 2022 14:25:29 -0000 @@ -33,15 +33,21 @@ #include <sys/_lock.h> +typedef struct { + volatile u_int lockval; +} m_lock_t; + struct __mp_lock_cpu { - u_int mplc_ticket; - u_int mplc_depth; + m_lock_t lock_storage __aligned(64); + m_lock_t *mylock; + m_lock_t *spinon; + u_int mplc_depth; }; struct __mp_lock { struct __mp_lock_cpu mpl_cpus[MAXCPUS]; - volatile u_int mpl_ticket; - u_int mpl_users; + m_lock_t mpl_initial_lock; + m_lock_t *mpl_global_lock; #ifdef WITNESS struct lock_object mpl_lock_obj; #endif -- jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF DDCC 0DFA 74AE 1524 E7EE
