On Fri, Nov 30, 2018 at 09:01:17AM -0700, Jens Axboe wrote:
> sbitmap maintains a set of words that we use to set and clear bits, with
> each bit representing a tag for blk-mq. Even though we spread the bits
> out and maintain a hint cache, one particular bit allocated will end up
> being cleared in the exact same spot.
>
> This introduces batched clearing of bits. Instead of clearing a given
> bit, the same bit is set in a cleared/free mask instead. If we fail
> allocating a bit from a given word, then we check the free mask, and
> batch move those cleared bits at that time. This trades 64 atomic bitops
> for 2 cmpxchg().
>
> In a threaded poll test case, half the overhead of getting and clearing
> tags is removed with this change. On another poll test case with a
> single thread, performance is unchanged.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> include/linux/sbitmap.h | 31 +++++++++++++---
> lib/sbitmap.c | 80 +++++++++++++++++++++++++++++++++++++----
> 2 files changed, 100 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index 804a50983ec5..07f117ee19dc 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -30,14 +30,24 @@ struct seq_file;
> */
> struct sbitmap_word {
> /**
> - * @word: The bitmap word itself.
> + * @depth: Number of bits being used in @word/@cleared
> */
> - unsigned long word;
> + unsigned long depth;
>
> /**
> - * @depth: Number of bits being used in @word.
> + * @word: word holding free bits
> */
> - unsigned long depth;
> + unsigned long word ____cacheline_aligned_in_smp;
Still splitting up word and depth in separate cachelines?
> + /**
> + * @cleared: word holding cleared bits
> + */
> + unsigned long cleared ____cacheline_aligned_in_smp;
> +
> + /**
> + * @swap_lock: Held while swapping word <-> cleared
> + */
> + spinlock_t swap_lock;
> } ____cacheline_aligned_in_smp;
>
> /**
> @@ -310,6 +320,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb,
> unsigned int bitnr)
> clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
> }
>
> +/*
> + * This one is special, since it doesn't actually clear the bit, rather it
> + * sets the corresponding bit in the ->cleared mask instead. Paired with
> + * the caller doing sbitmap_batch_clear() if a given index is full, which
> + * will clear the previously freed entries in the corresponding ->word.
> + */
> +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned
> int bitnr)
> +{
> + unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
> +
> + set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
> +}
> +
> static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
> unsigned int bitnr)
> {
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 45cab6bbc1c7..f6a9553617bd 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -59,6 +59,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int
> depth, int shift,
> for (i = 0; i < sb->map_nr; i++) {
> sb->map[i].depth = min(depth, bits_per_word);
> depth -= sb->map[i].depth;
> + spin_lock_init(&sb->map[i].swap_lock);
> }
> return 0;
> }
> @@ -111,6 +112,57 @@ static int __sbitmap_get_word(unsigned long *word,
> unsigned long depth,
> return nr;
> }
>
> +/*
> + * See if we have deferred clears that we can batch move
> + */
> +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
> +{
> + unsigned long mask, val;
> + bool ret = false;
> +
> + spin_lock(&sb->map[index].swap_lock);
> +
> + if (!sb->map[index].cleared)
> + goto out_unlock;
> +
> + /*
> + * First get a stable cleared mask, setting the old mask to 0.
> + */
> + do {
> + mask = sb->map[index].cleared;
> + } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask);
> +
> + /*
> + * Now clear the masked bits in our free word
> + */
> + do {
> + val = sb->map[index].word;
> + } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
> +
> + ret = true;
> +out_unlock:
> + spin_unlock(&sb->map[index].swap_lock);
> + return ret;
> +}
Okay, I couldn't find any holes in this one :)
> +static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
> + unsigned int alloc_hint, bool round_robin)
> +{
> + int nr;
> +
> + do {
> + nr = __sbitmap_get_word(&sb->map[index].word,
> + sb->map[index].depth, alloc_hint,
> + !round_robin);
> + if (nr != -1)
> + break;
> + if (!sbitmap_deferred_clear(sb, index))
> + break;
> + } while (1);
> +
> + return nr;
> +}
> +
> int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool
> round_robin)
> {
> unsigned int i, index;
> @@ -129,9 +181,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int
> alloc_hint, bool round_robin)
> alloc_hint = 0;
>
> for (i = 0; i < sb->map_nr; i++) {
> - nr = __sbitmap_get_word(&sb->map[index].word,
> - sb->map[index].depth, alloc_hint,
> - !round_robin);
> + nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
> + round_robin);
> if (nr != -1) {
> nr += index << sb->shift;
> break;
> @@ -206,23 +257,37 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb)
> }
> EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
>
> -unsigned int sbitmap_weight(const struct sbitmap *sb)
> +static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
> {
> unsigned int i, weight = 0;
>
> for (i = 0; i < sb->map_nr; i++) {
> const struct sbitmap_word *word = &sb->map[i];
>
> - weight += bitmap_weight(&word->word, word->depth);
> + if (set)
> + weight += bitmap_weight(&word->word, word->depth);
Should probably do
weight -= bitmap_weight(&word->cleared, word->depth);
too, right?
> + else
> + weight += bitmap_weight(&word->cleared, word->depth);
> }
> return weight;
> }
> +
> +unsigned int sbitmap_weight(const struct sbitmap *sb)
> +{
> + return __sbitmap_weight(sb, true);
> +}
> EXPORT_SYMBOL_GPL(sbitmap_weight);
>
> +static unsigned int sbitmap_cleared(const struct sbitmap *sb)
> +{
> + return __sbitmap_weight(sb, false);
> +}
> +
> void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
> {
> seq_printf(m, "depth=%u\n", sb->depth);
> - seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
> + seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb));
> + seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
> seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
> seq_printf(m, "map_nr=%u\n", sb->map_nr);
> }
> @@ -514,7 +579,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
> void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
> unsigned int cpu)
> {
> - sbitmap_clear_bit_unlock(&sbq->sb, nr);
> + sbitmap_deferred_clear_bit(&sbq->sb, nr);
> +
> /*
> * Pairs with the memory barrier in set_current_state() to ensure the
> * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
> --
> 2.17.1
>