Re: [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep

2023-12-06 Thread Dave Chinner
On Wed, Dec 06, 2023 at 11:16:50PM -0500, Kent Overstreet wrote:
> On Wed, Dec 06, 2023 at 05:05:39PM +1100, Dave Chinner wrote:
> > From: Dave Chinner 
> > 
> > hash-bl nests spinlocks inside the bit locks. This causes problems
> > for CONFIG_PREEMPT_RT which converts spin locks to sleeping locks,
> > and we're not allowed to sleep while holding a spinning lock.
> > 
> > Further, lockdep does not support bit locks, so we lose lockdep
> > coverage of the inode hash table with the hash-bl conversion.
> > 
> > To enable these configs to work, add an external per-chain spinlock
> > to the hlist_bl_head() and add helpers to use this instead of the
> > bit spinlock when preempt_rt or lockdep are enabled.
> > 
> > This converts all users of hlist-bl to use the external spinlock in
> > these situations, so we also gain lockdep coverage of things like
> > the dentry cache hash table with this change.
> > 
> > Signed-off-by: Dave Chinner 
> 
> Sleepable bit locks can be done with wait_on_bit(), is that worth
> considering for PREEMPT_RT? Or are the other features of real locks
> important there?

I think wait_on_bit() is not scalable. It hashes down to one of 256
shared struct wait_queue_heads which have thundering herd
behaviours, and it requires the locker to always run
prepare_to_wait() and finish_wait(). This means there is at least
one spinlock_irqsave()/unlock pair needed, sometimes two, just to
get an uncontended sleeping bit lock.

So as a fast path operation that requires lock scalability, it's
going to be better to use a straight spinlock that doesn't require
irq safety as it's far less expensive than a sleeping bit lock.

Whether CONFIG_PREEMPT_RT changes that equation at all is not at all
clear to me, and so I'll leave that consideration to RT people if
they see a need to address it. In the mean time, we need to use an
external spinlock for lockdep validation so it really doesn't make
any sense at all to add a third locking variant with completely
different semantics just for PREEMPT_RT...

-Dave.
-- 
Dave Chinner
da...@fromorbit.com



Re: [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep

2023-12-06 Thread Kent Overstreet
On Wed, Dec 06, 2023 at 05:05:39PM +1100, Dave Chinner wrote:
> From: Dave Chinner 
> 
> hash-bl nests spinlocks inside the bit locks. This causes problems
> for CONFIG_PREEMPT_RT which converts spin locks to sleeping locks,
> and we're not allowed to sleep while holding a spinning lock.
> 
> Further, lockdep does not support bit locks, so we lose lockdep
> coverage of the inode hash table with the hash-bl conversion.
> 
> To enable these configs to work, add an external per-chain spinlock
> to the hlist_bl_head() and add helpers to use this instead of the
> bit spinlock when preempt_rt or lockdep are enabled.
> 
> This converts all users of hlist-bl to use the external spinlock in
> these situations, so we also gain lockdep coverage of things like
> the dentry cache hash table with this change.
> 
> Signed-off-by: Dave Chinner 

Sleepable bit locks can be done with wait_on_bit(), is that worth
considering for PREEMPT_RT? Or are the other features of real locks
important there?

(not a request for the current patchset, just perhaps a note for future
work)

Reviewed-by: Kent Overstreet 

> ---
>  include/linux/list_bl.h| 126 -
>  include/linux/rculist_bl.h |  13 
>  2 files changed, 110 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
> index 8ee2bf5af131..990ad8e24e0b 100644
> --- a/include/linux/list_bl.h
> +++ b/include/linux/list_bl.h
> @@ -4,14 +4,27 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Special version of lists, where head of the list has a lock in the lowest
>   * bit. This is useful for scalable hash tables without increasing memory
>   * footprint overhead.
>   *
> - * For modification operations, the 0 bit of hlist_bl_head->first
> - * pointer must be set.
> + * Whilst the general use of bit spin locking is considered safe, PREEMPT_RT
> + * introduces a problem with nesting spin locks inside bit locks: spin locks
> + * become sleeping locks, and we can't sleep inside spinning locks such as 
> bit
> + * locks. However, for RTPREEMPT, performance is less of an issue than
> + * correctness, so we trade off the memory and cache footprint of a spinlock 
> per
> + * list so the list locks are converted to sleeping locks and work correctly
> + * with PREEMPT_RT kernels.
> + *
> + * An added advantage of this is that we can use the same trick when lockdep 
> is
> + * enabled (again, performance doesn't matter) and gain lockdep coverage of 
> all
> + * the hash-bl operations.
> + *
> + * For modification operations when using pure bit locking, the 0 bit of
> + * hlist_bl_head->first pointer must be set.
>   *
>   * With some small modifications, this can easily be adapted to store several
>   * arbitrary bits (not just a single lock bit), if the need arises to store
> @@ -30,16 +43,21 @@
>  #define LIST_BL_BUG_ON(x)
>  #endif
>  
> +#undef LIST_BL_USE_SPINLOCKS
> +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_LOCKDEP)
> +#define LIST_BL_USE_SPINLOCKS1
> +#endif
>  
>  struct hlist_bl_head {
>   struct hlist_bl_node *first;
> +#ifdef LIST_BL_USE_SPINLOCKS
> + spinlock_t lock;
> +#endif
>  };
>  
>  struct hlist_bl_node {
>   struct hlist_bl_node *next, **pprev;
>  };
> -#define INIT_HLIST_BL_HEAD(ptr) \
> - ((ptr)->first = NULL)
>  
>  static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
>  {
> @@ -54,6 +72,69 @@ static inline bool  hlist_bl_unhashed(const struct 
> hlist_bl_node *h)
>   return !h->pprev;
>  }
>  
> +#ifdef LIST_BL_USE_SPINLOCKS
> +#define INIT_HLIST_BL_HEAD(ptr) do { \
> + (ptr)->first = NULL; \
> + spin_lock_init(&(ptr)->lock); \
> +} while (0)
> +
> +static inline void hlist_bl_lock(struct hlist_bl_head *b)
> +{
> + spin_lock(>lock);
> +}
> +
> +static inline void hlist_bl_unlock(struct hlist_bl_head *b)
> +{
> + spin_unlock(>lock);
> +}
> +
> +static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
> +{
> + return spin_is_locked(>lock);
> +}
> +
> +static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
> +{
> + return h->first;
> +}
> +
> +static inline void hlist_bl_set_first(struct hlist_bl_head *h,
> + struct hlist_bl_node *n)
> +{
> + h->first = n;
> +}
> +
> +static inline void hlist_bl_set_before(struct hlist_bl_node **pprev,
> + struct hlist_bl_node *n)
> +{
> + WRITE_ONCE(*pprev, n);
> +}
> +
> +static inline bool hlist_bl_empty(const struct hlist_bl_head *h)
> +{
> + return !READ_ONCE(h->first);
> +}
> +
> +#else /* !LIST_BL_USE_SPINLOCKS */
> +
> +#define INIT_HLIST_BL_HEAD(ptr) \
> + ((ptr)->first = NULL)
> +
> +static inline void hlist_bl_lock(struct hlist_bl_head *b)
> +{
> + bit_spin_lock(0, (unsigned long *)b);
> +}
> +
> +static inline void hlist_bl_unlock(struct hlist_bl_head *b)
> +{
> + __bit_spin_unlock(0, (unsigned long *)b);
> +}
> +
> +static inline bool 

[PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep

2023-12-05 Thread Dave Chinner
From: Dave Chinner 

hash-bl nests spinlocks inside the bit locks. This causes problems
for CONFIG_PREEMPT_RT which converts spin locks to sleeping locks,
and we're not allowed to sleep while holding a spinning lock.

Further, lockdep does not support bit locks, so we lose lockdep
coverage of the inode hash table with the hash-bl conversion.

To enable these configs to work, add an external per-chain spinlock
to the hlist_bl_head() and add helpers to use this instead of the
bit spinlock when preempt_rt or lockdep are enabled.

This converts all users of hlist-bl to use the external spinlock in
these situations, so we also gain lockdep coverage of things like
the dentry cache hash table with this change.

Signed-off-by: Dave Chinner 
---
 include/linux/list_bl.h| 126 -
 include/linux/rculist_bl.h |  13 
 2 files changed, 110 insertions(+), 29 deletions(-)

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 8ee2bf5af131..990ad8e24e0b 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -4,14 +4,27 @@
 
 #include 
 #include 
+#include 
 
 /*
  * Special version of lists, where head of the list has a lock in the lowest
  * bit. This is useful for scalable hash tables without increasing memory
  * footprint overhead.
  *
- * For modification operations, the 0 bit of hlist_bl_head->first
- * pointer must be set.
+ * Whilst the general use of bit spin locking is considered safe, PREEMPT_RT
+ * introduces a problem with nesting spin locks inside bit locks: spin locks
+ * become sleeping locks, and we can't sleep inside spinning locks such as bit
+ * locks. However, for RTPREEMPT, performance is less of an issue than
+ * correctness, so we trade off the memory and cache footprint of a spinlock 
per
+ * list so the list locks are converted to sleeping locks and work correctly
+ * with PREEMPT_RT kernels.
+ *
+ * An added advantage of this is that we can use the same trick when lockdep is
+ * enabled (again, performance doesn't matter) and gain lockdep coverage of all
+ * the hash-bl operations.
+ *
+ * For modification operations when using pure bit locking, the 0 bit of
+ * hlist_bl_head->first pointer must be set.
  *
  * With some small modifications, this can easily be adapted to store several
  * arbitrary bits (not just a single lock bit), if the need arises to store
@@ -30,16 +43,21 @@
 #define LIST_BL_BUG_ON(x)
 #endif
 
+#undef LIST_BL_USE_SPINLOCKS
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_LOCKDEP)
+#define LIST_BL_USE_SPINLOCKS  1
+#endif
 
 struct hlist_bl_head {
struct hlist_bl_node *first;
+#ifdef LIST_BL_USE_SPINLOCKS
+   spinlock_t lock;
+#endif
 };
 
 struct hlist_bl_node {
struct hlist_bl_node *next, **pprev;
 };
-#define INIT_HLIST_BL_HEAD(ptr) \
-   ((ptr)->first = NULL)
 
 static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
 {
@@ -54,6 +72,69 @@ static inline bool  hlist_bl_unhashed(const struct 
hlist_bl_node *h)
return !h->pprev;
 }
 
+#ifdef LIST_BL_USE_SPINLOCKS
+#define INIT_HLIST_BL_HEAD(ptr) do { \
+   (ptr)->first = NULL; \
+   spin_lock_init(&(ptr)->lock); \
+} while (0)
+
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+   spin_lock(>lock);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+   spin_unlock(>lock);
+}
+
+static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
+{
+   return spin_is_locked(>lock);
+}
+
+static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
+{
+   return h->first;
+}
+
+static inline void hlist_bl_set_first(struct hlist_bl_head *h,
+   struct hlist_bl_node *n)
+{
+   h->first = n;
+}
+
+static inline void hlist_bl_set_before(struct hlist_bl_node **pprev,
+   struct hlist_bl_node *n)
+{
+   WRITE_ONCE(*pprev, n);
+}
+
+static inline bool hlist_bl_empty(const struct hlist_bl_head *h)
+{
+   return !READ_ONCE(h->first);
+}
+
+#else /* !LIST_BL_USE_SPINLOCKS */
+
+#define INIT_HLIST_BL_HEAD(ptr) \
+   ((ptr)->first = NULL)
+
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+   bit_spin_lock(0, (unsigned long *)b);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+   __bit_spin_unlock(0, (unsigned long *)b);
+}
+
+static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
+{
+   return bit_spin_is_locked(0, (unsigned long *)b);
+}
+
 static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
 {
return (struct hlist_bl_node *)
@@ -69,11 +150,21 @@ static inline void hlist_bl_set_first(struct hlist_bl_head 
*h,
h->first = (struct hlist_bl_node *)((unsigned long)n | 
LIST_BL_LOCKMASK);
 }
 
+static inline void hlist_bl_set_before(struct hlist_bl_node **pprev,
+   struct hlist_bl_node *n)
+{
+   WRITE_ONCE(*pprev,
+  (struct