Extend the contended_release tracepoint to queued spinlocks and queued rwlocks.
When the tracepoint is disabled, the only addition to the hot path is a single NOP instruction (the static branch). When enabled, the contention check, trace call, and unlock are combined in an out-of-line function to minimize hot path impact, avoiding the compiler needing to preserve the lock pointer in a callee-saved register across the trace call. Binary size impact (x86_64, defconfig): uninlined unlock (common case): +983 bytes (+0.00%) inlined unlock (worst case): +71554 bytes (+0.30%) The inlined unlock case could not be achieved through Kconfig options on x86_64 as PREEMPT_BUILD unconditionally selects UNINLINE_SPIN_UNLOCK on x86_64. The UNINLINE_SPIN_UNLOCK guards were manually inverted to force inline the unlock path and estimate the worst case binary size increase. Signed-off-by: Dmitry Ilvokhin <[email protected]> --- include/asm-generic/qrwlock.h | 48 +++++++++++++++++++++++++++------ include/asm-generic/qspinlock.h | 25 +++++++++++++++-- kernel/locking/qrwlock.c | 16 +++++++++++ kernel/locking/qspinlock.c | 8 ++++++ 4 files changed, 87 insertions(+), 10 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 75b8f4601b28..e24dc537fd66 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -14,6 +14,7 @@ #define __ASM_GENERIC_QRWLOCK_H #include <linux/atomic.h> +#include <linux/tracepoint-defs.h> #include <asm/barrier.h> #include <asm/processor.h> @@ -35,6 +36,10 @@ */ extern void queued_read_lock_slowpath(struct qrwlock *lock); extern void queued_write_lock_slowpath(struct qrwlock *lock); +extern void queued_read_unlock_traced(struct qrwlock *lock); +extern void queued_write_unlock_traced(struct qrwlock *lock); + +DECLARE_TRACEPOINT(contended_release); /** * queued_read_trylock - try to acquire read lock of a queued rwlock @@ -102,10 +107,16 @@ static inline void queued_write_lock(struct qrwlock *lock) } /** - * queued_read_unlock - release read lock of a queued rwlock + * queued_rwlock_is_contended - check if the lock is contended * @lock : Pointer to queued rwlock structure + * Return: 1 if lock contended, 0 otherwise */ -static inline void queued_read_unlock(struct qrwlock *lock) +static inline int queued_rwlock_is_contended(struct qrwlock *lock) +{ + return arch_spin_is_locked(&lock->wait_lock); +} + +static __always_inline void __queued_read_unlock(struct qrwlock *lock) { /* * Atomically decrement the reader count @@ -114,22 +125,43 @@ static inline void queued_read_unlock(struct qrwlock *lock) } /** - * queued_write_unlock - release write lock of a queued rwlock + * queued_read_unlock - release read lock of a queued rwlock * @lock : Pointer to queued rwlock structure */ -static inline void queued_write_unlock(struct qrwlock *lock) +static inline void queued_read_unlock(struct qrwlock *lock) +{ + /* + * Trace and unlock are combined in the traced unlock variant so + * the compiler does not need to preserve the lock pointer across + * the function call, avoiding callee-saved register save/restore + * on the hot path. + */ + if (tracepoint_enabled(contended_release)) { + queued_read_unlock_traced(lock); + return; + } + + __queued_read_unlock(lock); +} + +static __always_inline void __queued_write_unlock(struct qrwlock *lock) { smp_store_release(&lock->wlocked, 0); } /** - * queued_rwlock_is_contended - check if the lock is contended + * queued_write_unlock - release write lock of a queued rwlock * @lock : Pointer to queued rwlock structure - * Return: 1 if lock contended, 0 otherwise */ -static inline int queued_rwlock_is_contended(struct qrwlock *lock) +static inline void queued_write_unlock(struct qrwlock *lock) { - return arch_spin_is_locked(&lock->wait_lock); + /* See comment in queued_read_unlock(). */ + if (tracepoint_enabled(contended_release)) { + queued_write_unlock_traced(lock); + return; + } + + __queued_write_unlock(lock); } /* diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index bf47cca2c375..8ba463a3b891 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -41,6 +41,7 @@ #include <asm-generic/qspinlock_types.h> #include <linux/atomic.h> +#include <linux/tracepoint-defs.h> #ifndef queued_spin_is_locked /** @@ -116,6 +117,19 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) #endif #ifndef queued_spin_unlock + +DECLARE_TRACEPOINT(contended_release); + +extern void queued_spin_unlock_traced(struct qspinlock *lock); + +static __always_inline void __queued_spin_unlock(struct qspinlock *lock) +{ + /* + * unlock() needs release semantics: + */ + smp_store_release(&lock->locked, 0); +} + /** * queued_spin_unlock - release a queued spinlock * @lock : Pointer to queued spinlock structure @@ -123,9 +137,16 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static __always_inline void queued_spin_unlock(struct qspinlock *lock) { /* - * unlock() needs release semantics: + * Trace and unlock are combined in queued_spin_unlock_traced() + * so the compiler does not need to preserve the lock pointer + * across the function call, avoiding callee-saved register + * save/restore on the hot path. */ - smp_store_release(&lock->locked, 0); + if (tracepoint_enabled(contended_release)) { + queued_spin_unlock_traced(lock); + return; + } + __queued_spin_unlock(lock); } #endif diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index d2ef312a8611..5f7a0fc2b27a 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -90,3 +90,19 @@ void __lockfunc queued_write_lock_slowpath(struct qrwlock *lock) trace_contention_end(lock, 0); } EXPORT_SYMBOL(queued_write_lock_slowpath); + +void __lockfunc queued_read_unlock_traced(struct qrwlock *lock) +{ + if (queued_rwlock_is_contended(lock)) + trace_contended_release(lock); + __queued_read_unlock(lock); +} +EXPORT_SYMBOL(queued_read_unlock_traced); + +void __lockfunc queued_write_unlock_traced(struct qrwlock *lock) +{ + if (queued_rwlock_is_contended(lock)) + trace_contended_release(lock); + __queued_write_unlock(lock); +} +EXPORT_SYMBOL(queued_write_unlock_traced); diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index af8d122bb649..1544dcec65fa 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -104,6 +104,14 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif +void __lockfunc queued_spin_unlock_traced(struct qspinlock *lock) +{ + if (queued_spin_is_contended(lock)) + trace_contended_release(lock); + __queued_spin_unlock(lock); +} +EXPORT_SYMBOL(queued_spin_unlock_traced); + #endif /* _GEN_PV_LOCK_SLOWPATH */ /** -- 2.52.0
