On Fri, Nov 30, 2018 at 09:01:17AM -0700, Jens Axboe wrote:
> sbitmap maintains a set of words that we use to set and clear bits, with
> each bit representing a tag for blk-mq. Even though we spread the bits
> out and maintain a hint cache, one particular bit allocated will end up
> being cleared in the exact same spot.
> 
> This introduces batched clearing of bits. Instead of clearing a given
> bit, the same bit is set in a cleared/free mask instead. If we fail
> allocating a bit from a given word, then we check the free mask, and
> batch move those cleared bits at that time. This trades 64 atomic bitops
> for 2 cmpxchg().
> 
> In a threaded poll test case, half the overhead of getting and clearing
> tags is removed with this change. On another poll test case with a
> single thread, performance is unchanged.
> 
> Signed-off-by: Jens Axboe <[email protected]>
> ---
>  include/linux/sbitmap.h | 31 +++++++++++++---
>  lib/sbitmap.c           | 80 +++++++++++++++++++++++++++++++++++++----
>  2 files changed, 100 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index 804a50983ec5..07f117ee19dc 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -30,14 +30,24 @@ struct seq_file;
>   */
>  struct sbitmap_word {
>       /**
> -      * @word: The bitmap word itself.
> +      * @depth: Number of bits being used in @word/@cleared
>        */
> -     unsigned long word;
> +     unsigned long depth;
>  
>       /**
> -      * @depth: Number of bits being used in @word.
> +      * @word: word holding free bits
>        */
> -     unsigned long depth;
> +     unsigned long word ____cacheline_aligned_in_smp;

Still splitting up word and depth in separate cachelines?

> +     /**
> +      * @cleared: word holding cleared bits
> +      */
> +     unsigned long cleared ____cacheline_aligned_in_smp;
> +
> +     /**
> +      * @swap_lock: Held while swapping word <-> cleared
> +      */
> +     spinlock_t swap_lock;
>  } ____cacheline_aligned_in_smp;
>  
>  /**
> @@ -310,6 +320,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, 
> unsigned int bitnr)
>       clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
>  }
>  
> +/*
> + * This one is special, since it doesn't actually clear the bit, rather it
> + * sets the corresponding bit in the ->cleared mask instead. Paired with
> + * the caller doing sbitmap_batch_clear() if a given index is full, which
> + * will clear the previously freed entries in the corresponding ->word.
> + */
> +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned 
> int bitnr)
> +{
> +     unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
> +
> +     set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
> +}
> +
>  static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
>                                           unsigned int bitnr)
>  {
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 45cab6bbc1c7..f6a9553617bd 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -59,6 +59,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int 
> depth, int shift,
>       for (i = 0; i < sb->map_nr; i++) {
>               sb->map[i].depth = min(depth, bits_per_word);
>               depth -= sb->map[i].depth;
> +             spin_lock_init(&sb->map[i].swap_lock);
>       }
>       return 0;
>  }
> @@ -111,6 +112,57 @@ static int __sbitmap_get_word(unsigned long *word, 
> unsigned long depth,
>       return nr;
>  }
>  
> +/*
> + * See if we have deferred clears that we can batch move
> + */
> +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
> +{
> +     unsigned long mask, val;
> +     bool ret = false;
> +
> +     spin_lock(&sb->map[index].swap_lock);
> +
> +     if (!sb->map[index].cleared)
> +             goto out_unlock;
> +
> +     /*
> +      * First get a stable cleared mask, setting the old mask to 0.
> +      */
> +     do {
> +             mask = sb->map[index].cleared;
> +     } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask);
> +
> +     /*
> +      * Now clear the masked bits in our free word
> +      */
> +     do {
> +             val = sb->map[index].word;
> +     } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
> +
> +     ret = true;
> +out_unlock:
> +     spin_unlock(&sb->map[index].swap_lock);
> +     return ret;
> +}

Okay, I couldn't find any holes in this one :)

> +static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
> +                                  unsigned int alloc_hint, bool round_robin)
> +{
> +     int nr;
> +
> +     do {
> +             nr = __sbitmap_get_word(&sb->map[index].word,
> +                                     sb->map[index].depth, alloc_hint,
> +                                     !round_robin);
> +             if (nr != -1)
> +                     break;
> +             if (!sbitmap_deferred_clear(sb, index))
> +                     break;
> +     } while (1);
> +
> +     return nr;
> +}
> +
>  int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool 
> round_robin)
>  {
>       unsigned int i, index;
> @@ -129,9 +181,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int 
> alloc_hint, bool round_robin)
>               alloc_hint = 0;
>  
>       for (i = 0; i < sb->map_nr; i++) {
> -             nr = __sbitmap_get_word(&sb->map[index].word,
> -                                     sb->map[index].depth, alloc_hint,
> -                                     !round_robin);
> +             nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
> +                                             round_robin);
>               if (nr != -1) {
>                       nr += index << sb->shift;
>                       break;
> @@ -206,23 +257,37 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb)
>  }
>  EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
>  
> -unsigned int sbitmap_weight(const struct sbitmap *sb)
> +static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
>  {
>       unsigned int i, weight = 0;
>  
>       for (i = 0; i < sb->map_nr; i++) {
>               const struct sbitmap_word *word = &sb->map[i];
>  
> -             weight += bitmap_weight(&word->word, word->depth);
> +             if (set)
> +                     weight += bitmap_weight(&word->word, word->depth);

Should probably do
                        weight -= bitmap_weight(&word->cleared, word->depth);

too, right?

> +             else
> +                     weight += bitmap_weight(&word->cleared, word->depth);
>       }
>       return weight;
>  }
> +
> +unsigned int sbitmap_weight(const struct sbitmap *sb)
> +{
> +     return __sbitmap_weight(sb, true);
> +}
>  EXPORT_SYMBOL_GPL(sbitmap_weight);
>  
> +static unsigned int sbitmap_cleared(const struct sbitmap *sb)
> +{
> +     return __sbitmap_weight(sb, false);
> +}
> +
>  void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
>  {
>       seq_printf(m, "depth=%u\n", sb->depth);
> -     seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
> +     seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb));
> +     seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
>       seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
>       seq_printf(m, "map_nr=%u\n", sb->map_nr);
>  }
> @@ -514,7 +579,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
>  void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
>                        unsigned int cpu)
>  {
> -     sbitmap_clear_bit_unlock(&sbq->sb, nr);
> +     sbitmap_deferred_clear_bit(&sbq->sb, nr);
> +
>       /*
>        * Pairs with the memory barrier in set_current_state() to ensure the
>        * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
> -- 
> 2.17.1
> 

Reply via email to