Re: [PATCH v3 4/6] powerpc/64s: implement queued spinlocks and rwlocks
On Mon, Jul 06, 2020 at 02:35:38PM +1000, Nicholas Piggin wrote: > These have shown significantly improved performance and fairness when > spinlock contention is moderate to high on very large systems. > > [ Numbers hopefully forthcoming after more testing, but initial >results look good ] > > Thanks to the fast path, single threaded performance is not noticably > hurt. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/Kconfig | 13 > arch/powerpc/include/asm/Kbuild | 2 ++ > arch/powerpc/include/asm/qspinlock.h | 25 +++ > arch/powerpc/include/asm/spinlock.h | 5 + > arch/powerpc/include/asm/spinlock_types.h | 5 + > arch/powerpc/lib/Makefile | 3 +++ > include/asm-generic/qspinlock.h | 2 ++ > 7 files changed, 55 insertions(+) > create mode 100644 arch/powerpc/include/asm/qspinlock.h > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 24ac85c868db..17663ea57697 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -146,6 +146,8 @@ config PPC > select ARCH_SUPPORTS_ATOMIC_RMW > select ARCH_USE_BUILTIN_BSWAP > select ARCH_USE_CMPXCHG_LOCKREF if PPC64 > + select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS > + select ARCH_USE_QUEUED_SPINLOCKSif PPC_QUEUED_SPINLOCKS > select ARCH_WANT_IPC_PARSE_VERSION > select ARCH_WEAK_RELEASE_ACQUIRE > select BINFMT_ELF > @@ -492,6 +494,17 @@ config HOTPLUG_CPU > > Say N if you are unsure. > > +config PPC_QUEUED_SPINLOCKS > + bool "Queued spinlocks" > + depends on SMP > + default "y" if PPC_BOOK3S_64 > + help > + Say Y here to use to use queued spinlocks which are more complex > + but give better salability and fairness on large SMP and NUMA ^ +c? Thanks Michal > + systems. > + > + If unsure, say "Y" if you have lots of cores, otherwise "N". > + > config ARCH_CPU_PROBE_RELEASE > def_bool y > depends on HOTPLUG_CPU > diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild > index dadbcf3a0b1e..1dd8b6adff5e 100644 > --- a/arch/powerpc/include/asm/Kbuild > +++ b/arch/powerpc/include/asm/Kbuild > @@ -6,5 +6,7 @@ generated-y += syscall_table_spu.h > generic-y += export.h > generic-y += local64.h > generic-y += mcs_spinlock.h > +generic-y += qrwlock.h > +generic-y += qspinlock.h > generic-y += vtime.h > generic-y += early_ioremap.h > diff --git a/arch/powerpc/include/asm/qspinlock.h > b/arch/powerpc/include/asm/qspinlock.h > new file mode 100644 > index ..c49e33e24edd > --- /dev/null > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -0,0 +1,25 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_POWERPC_QSPINLOCK_H > +#define _ASM_POWERPC_QSPINLOCK_H > + > +#include > + > +#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ > + > +#define smp_mb__after_spinlock() smp_mb() > + > +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) > +{ > + /* > + * This barrier was added to simple spinlocks by commit 51d7d5205d338, > + * but it should now be possible to remove it, asm arm64 has done with > + * commit c6f5d02b6a0f. > + */ > + smp_mb(); > + return atomic_read(>val); > +} > +#define queued_spin_is_locked queued_spin_is_locked > + > +#include > + > +#endif /* _ASM_POWERPC_QSPINLOCK_H */ > diff --git a/arch/powerpc/include/asm/spinlock.h > b/arch/powerpc/include/asm/spinlock.h > index 21357fe05fe0..434615f1d761 100644 > --- a/arch/powerpc/include/asm/spinlock.h > +++ b/arch/powerpc/include/asm/spinlock.h > @@ -3,7 +3,12 @@ > #define __ASM_SPINLOCK_H > #ifdef __KERNEL__ > > +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS > +#include > +#include > +#else > #include > +#endif > > #endif /* __KERNEL__ */ > #endif /* __ASM_SPINLOCK_H */ > diff --git a/arch/powerpc/include/asm/spinlock_types.h > b/arch/powerpc/include/asm/spinlock_types.h > index 3906f52dae65..c5d742f18021 100644 > --- a/arch/powerpc/include/asm/spinlock_types.h > +++ b/arch/powerpc/include/asm/spinlock_types.h > @@ -6,6 +6,11 @@ > # error "please don't include this file directly" > #endif > > +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS > +#include > +#include > +#else > #include > +#endif > > #endif > diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile > index 5e994cda8e40..d66a645503eb 100644 > --- a/arch/powerpc/lib/Makefile > +++ b/arch/powerpc/lib/Makefile > @@ -41,7 +41,10 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o > copypage_power7.o \ > obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ > memcpy_64.o memcpy_mcsafe_64.o > > +ifndef CONFIG_PPC_QUEUED_SPINLOCKS > obj64-$(CONFIG_SMP) += locks.o > +endif > + > obj64-$(CONFIG_ALTIVEC) += vmx-helper.o > obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ >
Re: [PATCH v3 4/6] powerpc/64s: implement queued spinlocks and rwlocks
On Thu, Jul 09, 2020 at 08:20:25PM +1000, Michael Ellerman wrote: > Nicholas Piggin writes: > > These have shown significantly improved performance and fairness when > > spinlock contention is moderate to high on very large systems. > > > > [ Numbers hopefully forthcoming after more testing, but initial > >results look good ] > > Would be good to have something here, even if it's preliminary. > > > Thanks to the fast path, single threaded performance is not noticably > > hurt. > > > > Signed-off-by: Nicholas Piggin > > --- > > arch/powerpc/Kconfig | 13 > > arch/powerpc/include/asm/Kbuild | 2 ++ > > arch/powerpc/include/asm/qspinlock.h | 25 +++ > > arch/powerpc/include/asm/spinlock.h | 5 + > > arch/powerpc/include/asm/spinlock_types.h | 5 + > > arch/powerpc/lib/Makefile | 3 +++ > > > include/asm-generic/qspinlock.h | 2 ++ > > Who's ack do we need for that part? Mine I suppose would do, as discussed earlier, it probably isn't required anymore, but I understand the paranoia of not wanting to change too many things at once :-) Acked-by: Peter Zijlstra (Intel)
Re: [PATCH v3 4/6] powerpc/64s: implement queued spinlocks and rwlocks
Nicholas Piggin writes: > These have shown significantly improved performance and fairness when > spinlock contention is moderate to high on very large systems. > > [ Numbers hopefully forthcoming after more testing, but initial >results look good ] Would be good to have something here, even if it's preliminary. > Thanks to the fast path, single threaded performance is not noticably > hurt. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/Kconfig | 13 > arch/powerpc/include/asm/Kbuild | 2 ++ > arch/powerpc/include/asm/qspinlock.h | 25 +++ > arch/powerpc/include/asm/spinlock.h | 5 + > arch/powerpc/include/asm/spinlock_types.h | 5 + > arch/powerpc/lib/Makefile | 3 +++ > include/asm-generic/qspinlock.h | 2 ++ Who's ack do we need for that part? > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 24ac85c868db..17663ea57697 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -492,6 +494,17 @@ config HOTPLUG_CPU > > Say N if you are unsure. > > +config PPC_QUEUED_SPINLOCKS > + bool "Queued spinlocks" > + depends on SMP > + default "y" if PPC_BOOK3S_64 Not sure about default y? At least until we've got a better idea of the perf impact on a range of small/big new/old systems. > + help > + Say Y here to use to use queued spinlocks which are more complex > + but give better salability and fairness on large SMP and NUMA > + systems. > + > + If unsure, say "Y" if you have lots of cores, otherwise "N". Would be nice if we could give a range for "lots". > diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild > index dadbcf3a0b1e..1dd8b6adff5e 100644 > --- a/arch/powerpc/include/asm/Kbuild > +++ b/arch/powerpc/include/asm/Kbuild > @@ -6,5 +6,7 @@ generated-y += syscall_table_spu.h > generic-y += export.h > generic-y += local64.h > generic-y += mcs_spinlock.h > +generic-y += qrwlock.h > +generic-y += qspinlock.h The 2nd line spits a warning about a redundant entry. I think you want to just drop it. cheers
[PATCH v3 4/6] powerpc/64s: implement queued spinlocks and rwlocks
These have shown significantly improved performance and fairness when spinlock contention is moderate to high on very large systems. [ Numbers hopefully forthcoming after more testing, but initial results look good ] Thanks to the fast path, single threaded performance is not noticably hurt. Signed-off-by: Nicholas Piggin --- arch/powerpc/Kconfig | 13 arch/powerpc/include/asm/Kbuild | 2 ++ arch/powerpc/include/asm/qspinlock.h | 25 +++ arch/powerpc/include/asm/spinlock.h | 5 + arch/powerpc/include/asm/spinlock_types.h | 5 + arch/powerpc/lib/Makefile | 3 +++ include/asm-generic/qspinlock.h | 2 ++ 7 files changed, 55 insertions(+) create mode 100644 arch/powerpc/include/asm/qspinlock.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 24ac85c868db..17663ea57697 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -146,6 +146,8 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS + select ARCH_USE_QUEUED_SPINLOCKSif PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF @@ -492,6 +494,17 @@ config HOTPLUG_CPU Say N if you are unsure. +config PPC_QUEUED_SPINLOCKS + bool "Queued spinlocks" + depends on SMP + default "y" if PPC_BOOK3S_64 + help + Say Y here to use to use queued spinlocks which are more complex + but give better salability and fairness on large SMP and NUMA + systems. + + If unsure, say "Y" if you have lots of cores, otherwise "N". + config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index dadbcf3a0b1e..1dd8b6adff5e 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -6,5 +6,7 @@ generated-y += syscall_table_spu.h generic-y += export.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += qrwlock.h +generic-y += qspinlock.h generic-y += vtime.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h new file mode 100644 index ..c49e33e24edd --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_QSPINLOCK_H +#define _ASM_POWERPC_QSPINLOCK_H + +#include + +#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ + +#define smp_mb__after_spinlock() smp_mb() + +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + /* +* This barrier was added to simple spinlocks by commit 51d7d5205d338, +* but it should now be possible to remove it, asm arm64 has done with +* commit c6f5d02b6a0f. +*/ + smp_mb(); + return atomic_read(>val); +} +#define queued_spin_is_locked queued_spin_is_locked + +#include + +#endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 21357fe05fe0..434615f1d761 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -3,7 +3,12 @@ #define __ASM_SPINLOCK_H #ifdef __KERNEL__ +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include +#include +#else #include +#endif #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 3906f52dae65..c5d742f18021 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -6,6 +6,11 @@ # error "please don't include this file directly" #endif +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include +#include +#else #include +#endif #endif diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5e994cda8e40..d66a645503eb 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -41,7 +41,10 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o memcpy_mcsafe_64.o +ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP)+= locks.o +endif + obj64-$(CONFIG_ALTIVEC)+= vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST)+= test_emulate_step.o \ test_emulate_step_exec_instr.o diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index fde943d180e0..fb0a814d4395 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -12,6 +12,7 @@ #include +#ifndef queued_spin_is_locked /** *