The origin of this series: When Nick Piggin posted his first suggestion
for ticket spinlocks on LKML, I immediately liked the idea. For details
check LWN [1], in a nutshell: This algorithm enforces strict FIFO order
for the admission to contended spinlocks, thus it improves the
determinism on SMP systems with more than 2 CPUs.

Meanwhile, ticket spinlocks are mainline (2.6.25). But that version has
to drawbacks for us: it doesn't support nesting like xnlock does, and it
is x86-only so far.

So I designed a version for Xenomai which is both nestable and
arch-independent. It is certainly not as optimal as mainline's version,
but our code path stresses the locking code differently anyway, and no
one denies us to introduce per-arch optimized variants later on (if this
lock implementation gains broader relevance).

Survived heavy testing on a 4-way box (15, 20, 25, and 30 us latency
instances across the cores).

[1] http://lwn.net/Articles/267331

---
 include/asm-generic/bits/pod.h |    4 -
 include/asm-generic/system.h   |  122 +++++++++++++++++++++++++++++++++++++----
 ksrc/nucleus/Kconfig           |   12 ++++
 3 files changed, 126 insertions(+), 12 deletions(-)

Index: b/include/asm-generic/system.h
===================================================================
--- a/include/asm-generic/system.h
+++ b/include/asm-generic/system.h
@@ -104,7 +104,12 @@ typedef struct {
 
 typedef struct {
 
+#ifdef CONFIG_XENO_OPT_TICKET_LOCK
+	atomic_t fifo;
+	int owner;
+#else
 	atomic_t owner;
+#endif
 	const char *file;
 	const char *function;
 	unsigned line;
@@ -115,7 +120,7 @@ typedef struct {
 } xnlock_t;
 
 #define XNARCH_LOCK_UNLOCKED (xnlock_t) {	\
-	{ ~0 },					\
+	XNLOCK_INIT_VALUE,			\
 	NULL,					\
 	NULL,					\
 	0,					\
@@ -167,6 +172,8 @@ xnlock_dbg_acquired(xnlock_t *lock, int 
 	lock->cpu = cpu;
 }
 
+static inline int xnlock_owner(xnlock_t *lock);
+
 static inline int xnlock_dbg_release(xnlock_t *lock)
 {
 	extern xnlockinfo_t xnlock_stats[];
@@ -174,7 +181,7 @@ static inline int xnlock_dbg_release(xnl
 	int cpu = xnarch_current_cpu();
 	xnlockinfo_t *stats = &xnlock_stats[cpu];
 
-	if (unlikely(atomic_read(&lock->owner) != cpu)) {
+	if (unlikely(xnlock_owner(lock) != cpu)) {
 		rthal_emergency_console();
 		printk(KERN_ERR "Xenomai: unlocking unlocked nucleus lock %p"
 				" on CPU #%d\n"
@@ -199,9 +206,18 @@ static inline int xnlock_dbg_release(xnl
 
 #else /* !(CONFIG_SMP && XENO_DEBUG(NUCLEUS)) */
 
+#ifdef CONFIG_XENO_OPT_TICKET_LOCK
+typedef struct {
+
+	atomic_t fifo;
+	int owner;
+
+} xnlock_t;
+#else
 typedef struct { atomic_t owner; } xnlock_t;
+#endif
 
-#define XNARCH_LOCK_UNLOCKED		(xnlock_t) { { ~0 } }
+#define XNARCH_LOCK_UNLOCKED		(xnlock_t) { XNLOCK_INIT_VALUE }
 
 #define XNLOCK_DBG_CONTEXT
 #define XNLOCK_DBG_CONTEXT_ARGS
@@ -313,11 +329,6 @@ static inline int xnarch_setimask (int i
 #define xnlock_clear_irqoff(lock)	xnlock_put_irqrestore(lock, 1)
 #define xnlock_clear_irqon(lock)	xnlock_put_irqrestore(lock, 0)
 
-static inline void xnlock_init (xnlock_t *lock)
-{
-	*lock = XNARCH_LOCK_UNLOCKED;
-}
-
 #define DECLARE_XNLOCK(lock)		xnlock_t lock
 #define DECLARE_EXTERN_XNLOCK(lock)	extern xnlock_t lock
 #define DEFINE_XNLOCK(lock)		xnlock_t lock = XNARCH_LOCK_UNLOCKED
@@ -325,12 +336,97 @@ static inline void xnlock_init (xnlock_t
 
 void __xnlock_spin(xnlock_t *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS);
 
+#ifdef CONFIG_XENO_OPT_TICKET_LOCK
+/* Note: We assume atomic_t is at least 32-bit wide. */
+#define XNLOCK_HEAD_MASK		0x000000FF
+#define XNLOCK_HEAD_INC			0x00000001
+#define XNLOCK_TAIL_MASK		0xFF000000
+#define XNLOCK_TAIL_SHIFT		24
+#define XNLOCK_TAIL_INC			0x01000000
+#define XNLOCK_TICKET_MASK		(XNLOCK_HEAD_MASK | XNLOCK_TAIL_MASK)
+#define XNLOCK_INIT_VALUE		{ 0x00000001 }, ~0
+
+static inline int xnlock_owner(xnlock_t *lock)
+{
+	return lock->owner;
+}
+
+static inline int __xnlock_get(xnlock_t *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
+{
+	unsigned long long start;
+	int cpu = xnarch_current_cpu();
+	u32 fifo;
+	u8 ticket;
+
+	if (xnlock_owner(lock) == cpu)
+		return 1;
+
+	xnlock_dbg_prepare_acquire(&start);
+
+	fifo = atomic_add_return(XNLOCK_TAIL_INC, &lock->fifo);
+	ticket = fifo >> XNLOCK_TAIL_SHIFT;
+
+	/* First check if we are already the owner (head == tail). */
+	if (unlikely((u8)fifo != ticket)) {
+	 	unsigned int spin_limit;
+
+		xnlock_dbg_prepare_spin(&spin_limit);
+
+		/*
+		 * Spin until head equals our ticket. In this case,
+		 * moving out-of-line does not pay off.
+		 */
+		do {
+			cpu_relax();
+			xnlock_dbg_spinning(lock, cpu, &spin_limit /*, */
+					    XNLOCK_DBG_PASS_CONTEXT);
+		} while ((u8)atomic_read(&lock->fifo) != ticket);
+	}
+	lock->owner = cpu;
+
+	xnlock_dbg_acquired(lock, cpu, &start /*, */ XNLOCK_DBG_PASS_CONTEXT);
+
+	return 0;
+}
+
+static inline void xnlock_put(xnlock_t *lock)
+{
+	u32 fifo = atomic_read(&lock->fifo);
+
+	if (xnlock_dbg_release(lock))
+		return;
+
+	lock->owner = ~0;
+
+	/*
+	 * Make sure all data written inside the lock is visible to
+	 * other CPUs before we release the lock.
+	 */
+	xnarch_memory_barrier();
+
+	/*
+	 * Carefully calculate the addend so that we confine the
+	 * XNLOCK_HEAD_INC just to the first byte of fifo.
+	 */
+	atomic_add(((fifo + XNLOCK_HEAD_INC) & XNLOCK_TICKET_MASK) - fifo,
+		   &lock->fifo);
+}
+
+#else /* ! CONFIG_XENO_OPT_TICKET_LOCK */
+
+#define XNLOCK_INIT_VALUE		{ ~0 }
+
+static inline int xnlock_owner(xnlock_t *lock)
+{
+	return atomic_read(&lock->owner);
+}
+
 static inline int __xnlock_get(xnlock_t *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
 {
 	unsigned long long start;
 	int cpu = xnarch_current_cpu();
 
-	if (atomic_read(&lock->owner) == cpu)
+	if (xnlock_owner(lock) == cpu)
 		return 1;
 
 	xnlock_dbg_prepare_acquire(&start);
@@ -356,6 +452,12 @@ static inline void xnlock_put(xnlock_t *
 
 	atomic_set(&lock->owner, ~0);
 }
+#endif /* ! CONFIG_XENO_OPT_TICKET_LOCK */
+
+static inline void xnlock_init(xnlock_t *lock)
+{
+	*lock = XNARCH_LOCK_UNLOCKED;
+}
 
 static inline spl_t
 __xnlock_get_irqsave(xnlock_t *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
@@ -386,7 +488,7 @@ static inline int xnarch_send_ipi(xnarch
 
 static inline int xnlock_is_owner(xnlock_t *lock)
 {
-	return atomic_read(&lock->owner) == xnarch_current_cpu();
+	return xnlock_owner(lock) == xnarch_current_cpu();
 }
 
 #else /* !CONFIG_SMP */
Index: b/ksrc/nucleus/Kconfig
===================================================================
--- a/ksrc/nucleus/Kconfig
+++ b/ksrc/nucleus/Kconfig
@@ -340,6 +340,18 @@ config XENO_OPT_TIMER_WHEEL_STEP
 	Set the duration in ns of a timer wheel step. At each step, 
 	the timer wheel use the next hash bucket.
 
+config XENO_OPT_TICKET_LOCK
+	bool "FIFO ticket spinlocks"
+	depends on SMP
+	help
+
+	Uses a ticket spinlock algorithm which enforces strict FIFO
+	admission order on contended locks, improving the determinism
+	of lock acquisitions on large SMP systems. The ticket spinlock
+	has a higher overhead than the unordered algorithm and should
+	therefore only be used if there are more than two CPUs, thus
+	more than one waiter in the worst case.
+
 endmenu
 
 endif
Index: b/include/asm-generic/bits/pod.h
===================================================================
--- a/include/asm-generic/bits/pod.h
+++ b/include/asm-generic/bits/pod.h
@@ -295,7 +295,7 @@ unsigned long long xnarch_get_cpu_time(v
 
 EXPORT_SYMBOL(xnarch_get_cpu_time);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_XENO_OPT_TICKET_LOCK)
 void __xnlock_spin(xnlock_t *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
 {
 	unsigned int spin_limit;
@@ -311,6 +311,6 @@ void __xnlock_spin(xnlock_t *lock /*, */
 		} while(atomic_read(&lock->owner) != ~0);
 }
 EXPORT_SYMBOL(__xnlock_spin);
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_SMP && !CONFIG_XENO_OPT_TICKET_LOCK */
 
 #endif /* !_XENO_ASM_GENERIC_BITS_POD_H */

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Reply via email to