On 02/03/2018 05:21 AM, Ming Lei wrote:
> Quite a few HBAs(such as HPSA, megaraid, mpt3sas, ..) support multiple
> reply queues, but tags is often HBA wide.
>
> These HBAs have switched to use pci_alloc_irq_vectors(PCI_IRQ_AFFINITY)
> for automatic affinity assignment.
>
> Now 84676c1f21e8ff5(genirq/affinity: assign vectors to all possible CPUs)
> has been merged to V4.16-rc, and it is easy to allocate all offline CPUs
> for some irq vectors, this can't be avoided even though the allocation
> is improved.
>
> So all these drivers have to avoid to ask HBA to complete request in
> reply queue which hasn't online CPUs assigned, and HPSA has been broken
> with v4.15+:
>
> https://marc.info/?l=linux-kernel&m=151748144730409&w=2
>
> This issue can be solved generically and easily via blk_mq(scsi_mq) multiple
> hw queue by mapping each reply queue into hctx, but one tricky thing is
> the HBA wide(instead of hw queue wide) tags.
>
> This patch is based on the following Hannes's patch:
>
> https://marc.info/?l=linux-block&m=149132580511346&w=2
>
> One big difference with Hannes's is that this patch only makes the tags
> sbitmap
> and active_queues data structure HBA wide, and others are kept as NUMA
> locality,
> such as request, hctx, tags, ...
>
> The following patch will support global tags on null_blk, also the performance
> data is provided, no obvious performance loss is observed when the whole
> hw queue depth is same.
>
> Cc: Hannes Reinecke <[email protected]>
> Cc: Arun Easi <[email protected]>
> Cc: Omar Sandoval <[email protected]>,
> Cc: "Martin K. Petersen" <[email protected]>,
> Cc: James Bottomley <[email protected]>,
> Cc: Christoph Hellwig <[email protected]>,
> Cc: Don Brace <[email protected]>
> Cc: Kashyap Desai <[email protected]>
> Cc: Peter Rivera <[email protected]>
> Cc: Laurence Oberman <[email protected]>
> Cc: Mike Snitzer <[email protected]>
> Signed-off-by: Ming Lei <[email protected]>
> ---
> block/blk-mq-debugfs.c | 1 +
> block/blk-mq-sched.c | 2 +-
> block/blk-mq-tag.c | 23 ++++++++++++++++++-----
> block/blk-mq-tag.h | 5 ++++-
> block/blk-mq.c | 29 ++++++++++++++++++++++++-----
> block/blk-mq.h | 3 ++-
> include/linux/blk-mq.h | 2 ++
> 7 files changed, 52 insertions(+), 13 deletions(-)
>
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 0dfafa4b655a..0f0fafe03f5d 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -206,6 +206,7 @@ static const char *const hctx_flag_name[] = {
> HCTX_FLAG_NAME(SHOULD_MERGE),
> HCTX_FLAG_NAME(TAG_SHARED),
> HCTX_FLAG_NAME(SG_MERGE),
> + HCTX_FLAG_NAME(GLOBAL_TAGS),
> HCTX_FLAG_NAME(BLOCKING),
> HCTX_FLAG_NAME(NO_SCHED),
> };
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 55c0a745b427..191d4bc95f0e 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -495,7 +495,7 @@ static int blk_mq_sched_alloc_tags(struct request_queue
> *q,
> int ret;
>
> hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
> - set->reserved_tags);
> + set->reserved_tags, false);
> if (!hctx->sched_tags)
> return -ENOMEM;
>
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 571797dc36cb..66377d09eaeb 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -379,9 +379,11 @@ static struct blk_mq_tags
> *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
> return NULL;
> }
>
> -struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
> +struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *set,
> + unsigned int total_tags,
> unsigned int reserved_tags,
> - int node, int alloc_policy)
> + int node, int alloc_policy,
> + bool global_tag)
> {
> struct blk_mq_tags *tags;
>
> @@ -397,6 +399,14 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int
> total_tags,
> tags->nr_tags = total_tags;
> tags->nr_reserved_tags = reserved_tags;
>
> + WARN_ON(global_tag && !set->global_tags);
> + if (global_tag && set->global_tags) {
> + tags->bitmap_tags = set->global_tags->bitmap_tags;
> + tags->breserved_tags = set->global_tags->breserved_tags;
> + tags->active_queues = set->global_tags->active_queues;
> + tags->global_tags = true;
> + return tags;
> + }
> tags->bitmap_tags = &tags->__bitmap_tags;
> tags->breserved_tags = &tags->__breserved_tags;
> tags->active_queues = &tags->__active_queues;
Do we really need the 'global_tag' flag here?
Can't we just rely on the ->global_tags pointer to be set?
> @@ -406,8 +416,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int
> total_tags,
>
> void blk_mq_free_tags(struct blk_mq_tags *tags)
> {
> - sbitmap_queue_free(tags->bitmap_tags);
> - sbitmap_queue_free(tags->breserved_tags);
> + if (!tags->global_tags) {
> + sbitmap_queue_free(tags->bitmap_tags);
> + sbitmap_queue_free(tags->breserved_tags);
> + }
> kfree(tags);
> }
>
> @@ -441,7 +453,8 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
> if (tdepth > 16 * BLKDEV_MAX_RQ)
> return -EINVAL;
>
> - new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0);
> + new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0,
> + (*tagsptr)->global_tags);
> if (!new)
> return -ENOMEM;
> ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
> diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
> index a68323fa0c02..a87b5cfa5726 100644
> --- a/block/blk-mq-tag.h
> +++ b/block/blk-mq-tag.h
> @@ -20,13 +20,16 @@ struct blk_mq_tags {
> struct sbitmap_queue __bitmap_tags;
> struct sbitmap_queue __breserved_tags;
>
> + bool global_tags;
> struct request **rqs;
> struct request **static_rqs;
> struct list_head page_list;
> };
>
>
> -extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned
> int reserved_tags, int node, int alloc_policy);
> +extern struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *set,
> + unsigned int nr_tags, unsigned int reserved_tags, int node,
> + int alloc_policy, bool global_tag);
> extern void blk_mq_free_tags(struct blk_mq_tags *tags);
>
> extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 69d4534870af..a98466dc71b5 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2023,7 +2023,8 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags)
> struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
> unsigned int hctx_idx,
> unsigned int nr_tags,
> - unsigned int reserved_tags)
> + unsigned int reserved_tags,
> + bool global_tags)
> {
> struct blk_mq_tags *tags;
> int node;
> @@ -2032,8 +2033,9 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct
> blk_mq_tag_set *set,
> if (node == NUMA_NO_NODE)
> node = set->numa_node;
>
> - tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
> - BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
> + tags = blk_mq_init_tags(set, nr_tags, reserved_tags, node,
> + BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags),
> + global_tags);
> if (!tags)
> return NULL;
> > @@ -2336,7 +2338,8 @@ static bool __blk_mq_alloc_rq_map(struct
blk_mq_tag_set *set, int hctx_idx)
> int ret = 0;
>
> set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
> - set->queue_depth, set->reserved_tags);
> + set->queue_depth, set->reserved_tags,
> + !!(set->flags & BLK_MQ_F_GLOBAL_TAGS));
> if (!set->tags[hctx_idx])
> return false;
>
> @@ -2891,15 +2894,28 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
> if (ret)
> goto out_free_mq_map;
>
> + if (set->flags & BLK_MQ_F_GLOBAL_TAGS) {
> + ret = -ENOMEM;
> + set->global_tags = blk_mq_init_tags(set, set->queue_depth,
> + set->reserved_tags, set->numa_node,
> + BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags),
> + false);
> + if (!set->global_tags)
> + goto out_free_mq_map;
> + }
> +
> ret = blk_mq_alloc_rq_maps(set);
> if (ret)
> - goto out_free_mq_map;
> + goto out_free_global_tags;
>
> mutex_init(&set->tag_list_lock);
> INIT_LIST_HEAD(&set->tag_list);
>
> return 0;
>
> +out_free_global_tags:
> + if (set->global_tags)
> + blk_mq_free_tags(set->global_tags);
> out_free_mq_map:
> kfree(set->mq_map);
> set->mq_map = NULL;
> @@ -2914,6 +2930,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
> {
> int i;
>
> + if (set->global_tags)
> + blk_mq_free_tags(set->global_tags);
> +
> for (i = 0; i < nr_cpu_ids; i++)
> blk_mq_free_map_and_requests(set, i);
>
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 88c558f71819..d1d9a0a8e1fa 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -61,7 +61,8 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags);
> struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
> unsigned int hctx_idx,
> unsigned int nr_tags,
> - unsigned int reserved_tags);
> + unsigned int reserved_tags,
> + bool global_tags);
> int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
> unsigned int hctx_idx, unsigned int depth);
>
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 8efcf49796a3..8548c72d6b4a 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -82,6 +82,7 @@ struct blk_mq_tag_set {
> void *driver_data;
>
> struct blk_mq_tags **tags;
> + struct blk_mq_tags *global_tags; /* for BLK_MQ_F_GLOBAL_TAGS */
>
> struct mutex tag_list_lock;
> struct list_head tag_list;
> @@ -175,6 +176,7 @@ enum {
> BLK_MQ_F_SHOULD_MERGE = 1 << 0,
> BLK_MQ_F_TAG_SHARED = 1 << 1,
> BLK_MQ_F_SG_MERGE = 1 << 2,
> + BLK_MQ_F_GLOBAL_TAGS = 1 << 3,
> BLK_MQ_F_BLOCKING = 1 << 5,
> BLK_MQ_F_NO_SCHED = 1 << 6,
> BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
>
Otherwise:
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)