On 12/17/18 5:55 AM, Jens Axboe wrote:
> On 12/17/18 5:08 AM, Ming Lei wrote:
>> When requst is added to rq list of sw queue(ctx), the rq may be from
>> different type of hctx, especially after multi queue mapping is introduced.
>>
>> So when dispach request from sw queue via blk_mq_flush_busy_ctxs() or
>> blk_mq_dequeue_from_ctx(), one request belonging to other queue type of
>> hctx can be dispatch to current hctx in case that read queue or poll queue
>> is enabled.
>>
>> This patch fixes this issue by introducing per-queue-type list.
> 
> Looks good, just one comment:
> 
>> diff --git a/block/blk-mq.h b/block/blk-mq.h
>> index d1ed096723fb..0973a91eb1dd 100644
>> --- a/block/blk-mq.h
>> +++ b/block/blk-mq.h
>> @@ -12,14 +12,16 @@ struct blk_mq_ctxs {
>>      struct blk_mq_ctx __percpu      *queue_ctx;
>>  };
>>  
>> +struct blk_mq_ctx_list {
>> +    spinlock_t              lock;
>> +    struct list_head        rq_list;
>> +}  ____cacheline_aligned_in_smp;
>> +
>>  /**
>>   * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
>>   */
>>  struct blk_mq_ctx {
>> -    struct {
>> -            spinlock_t              lock;
>> -            struct list_head        rq_list;
>> -    }  ____cacheline_aligned_in_smp;
>> +    struct blk_mq_ctx_list  list[HCTX_MAX_TYPES];
> 
> Let's not make that use 3 cachelines. There is no good reason to split
> these across cachelines, if we have heavy traffic to multiple of these,
> then we're not going very fast anyway. So just make it:
> 
>       struct {
>               spinlock_t              lock;
>               struct list_head        rq_list[HCTX_MAX_TYPES];
>       }  ____cacheline_aligned_in_smp;

Did it just to check, turns out fine and is of course less changes.
Let me know.


diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1e12033be9ea..90d68760af08 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -652,36 +652,43 @@ static int hctx_dispatch_busy_show(void *data, struct 
seq_file *m)
        return 0;
 }
 
-static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
-       __acquires(&ctx->lock)
-{
-       struct blk_mq_ctx *ctx = m->private;
-
-       spin_lock(&ctx->lock);
-       return seq_list_start(&ctx->rq_list, *pos);
-}
-
-static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       struct blk_mq_ctx *ctx = m->private;
-
-       return seq_list_next(v, &ctx->rq_list, pos);
-}
+#define CTX_RQ_SEQ_OPS(name, type)                                     \
+static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
+       __acquires(&ctx->lock)                                          \
+{                                                                      \
+       struct blk_mq_ctx *ctx = m->private;                            \
+                                                                       \
+       spin_lock(&ctx->lock);                                          \
+       return seq_list_start(&ctx->rq_lists[type], *pos);              \
+}                                                                      \
+                                                                       \
+static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v,    \
+                                    loff_t *pos)                       \
+{                                                                      \
+       struct blk_mq_ctx *ctx = m->private;                            \
+                                                                       \
+       return seq_list_next(v, &ctx->rq_lists[type], pos);             \
+}                                                                      \
+                                                                       \
+static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v)     \
+       __releases(&ctx->lock)                                          \
+{                                                                      \
+       struct blk_mq_ctx *ctx = m->private;                            \
+                                                                       \
+       spin_unlock(&ctx->lock);                                        \
+}                                                                      \
+                                                                       \
+static const struct seq_operations ctx_##name##_rq_list_seq_ops = {    \
+       .start  = ctx_##name##_rq_list_start,                           \
+       .next   = ctx_##name##_rq_list_next,                            \
+       .stop   = ctx_##name##_rq_list_stop,                            \
+       .show   = blk_mq_debugfs_rq_show,                               \
+}
+
+CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT);
+CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ);
+CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL);
 
-static void ctx_rq_list_stop(struct seq_file *m, void *v)
-       __releases(&ctx->lock)
-{
-       struct blk_mq_ctx *ctx = m->private;
-
-       spin_unlock(&ctx->lock);
-}
-
-static const struct seq_operations ctx_rq_list_seq_ops = {
-       .start  = ctx_rq_list_start,
-       .next   = ctx_rq_list_next,
-       .stop   = ctx_rq_list_stop,
-       .show   = blk_mq_debugfs_rq_show,
-};
 static int ctx_dispatched_show(void *data, struct seq_file *m)
 {
        struct blk_mq_ctx *ctx = data;
@@ -819,7 +826,9 @@ static const struct blk_mq_debugfs_attr 
blk_mq_debugfs_hctx_attrs[] = {
 };
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
-       {"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops},
+       {"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
+       {"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
+       {"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops},
        {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
        {"merged", 0600, ctx_merged_show, ctx_merged_write},
        {"completed", 0600, ctx_completed_show, ctx_completed_write},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1efd781fcdea..5af40bbf4fc6 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -301,11 +301,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
  * too much time checking for merges.
  */
 static bool blk_mq_attempt_merge(struct request_queue *q,
+                                struct blk_mq_hw_ctx *hctx,
                                 struct blk_mq_ctx *ctx, struct bio *bio)
 {
+       enum hctx_type type = hctx->type;
+
        lockdep_assert_held(&ctx->lock);
 
-       if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+       if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
                ctx->rq_merged++;
                return true;
        }
@@ -319,17 +322,19 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, 
struct bio *bio)
        struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
        struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
        bool ret = false;
+       enum hctx_type type;
 
        if (e && e->type->ops.bio_merge) {
                blk_mq_put_ctx(ctx);
                return e->type->ops.bio_merge(hctx, bio);
        }
 
+       type = hctx->type;
        if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
-                       !list_empty_careful(&ctx->rq_list)) {
+                       !list_empty_careful(&ctx->rq_lists[type])) {
                /* default per sw-queue merge */
                spin_lock(&ctx->lock);
-               ret = blk_mq_attempt_merge(q, ctx, bio);
+               ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
                spin_unlock(&ctx->lock);
        }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 313f28b2d079..1546e88fe59c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -958,9 +958,10 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned 
int bitnr, void *data)
        struct flush_busy_ctx_data *flush_data = data;
        struct blk_mq_hw_ctx *hctx = flush_data->hctx;
        struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+       enum hctx_type type = hctx->type;
 
        spin_lock(&ctx->lock);
-       list_splice_tail_init(&ctx->rq_list, flush_data->list);
+       list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
        sbitmap_clear_bit(sb, bitnr);
        spin_unlock(&ctx->lock);
        return true;
@@ -992,12 +993,13 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, 
unsigned int bitnr,
        struct dispatch_rq_data *dispatch_data = data;
        struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
        struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+       enum hctx_type type = hctx->type;
 
        spin_lock(&ctx->lock);
-       if (!list_empty(&ctx->rq_list)) {
-               dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+       if (!list_empty(&ctx->rq_lists[type])) {
+               dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
                list_del_init(&dispatch_data->rq->queuelist);
-               if (list_empty(&ctx->rq_list))
+               if (list_empty(&ctx->rq_lists[type]))
                        sbitmap_clear_bit(sb, bitnr);
        }
        spin_unlock(&ctx->lock);
@@ -1608,15 +1610,16 @@ static inline void __blk_mq_insert_req_list(struct 
blk_mq_hw_ctx *hctx,
                                            bool at_head)
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
+       enum hctx_type type = hctx->type;
 
        lockdep_assert_held(&ctx->lock);
 
        trace_block_rq_insert(hctx->queue, rq);
 
        if (at_head)
-               list_add(&rq->queuelist, &ctx->rq_list);
+               list_add(&rq->queuelist, &ctx->rq_lists[type]);
        else
-               list_add_tail(&rq->queuelist, &ctx->rq_list);
+               list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
 }
 
 void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -1651,6 +1654,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, 
struct blk_mq_ctx *ctx,
 
 {
        struct request *rq;
+       enum hctx_type type = hctx->type;
 
        /*
         * preemption doesn't flush plug list, so it's possible ctx->cpu is
@@ -1662,7 +1666,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, 
struct blk_mq_ctx *ctx,
        }
 
        spin_lock(&ctx->lock);
-       list_splice_tail_init(list, &ctx->rq_list);
+       list_splice_tail_init(list, &ctx->rq_lists[type]);
        blk_mq_hctx_mark_pending(hctx, ctx);
        spin_unlock(&ctx->lock);
 }
@@ -2200,13 +2204,15 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, 
struct hlist_node *node)
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        LIST_HEAD(tmp);
+       enum hctx_type type;
 
        hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
        ctx = __blk_mq_get_ctx(hctx->queue, cpu);
+       type = hctx->type;
 
        spin_lock(&ctx->lock);
-       if (!list_empty(&ctx->rq_list)) {
-               list_splice_init(&ctx->rq_list, &tmp);
+       if (!list_empty(&ctx->rq_lists[type])) {
+               list_splice_init(&ctx->rq_lists[type], &tmp);
                blk_mq_hctx_clear_pending(hctx, ctx);
        }
        spin_unlock(&ctx->lock);
@@ -2343,10 +2349,14 @@ static void blk_mq_init_cpu_queues(struct request_queue 
*q,
        for_each_possible_cpu(i) {
                struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
                struct blk_mq_hw_ctx *hctx;
+               int k;
 
                __ctx->cpu = i;
+
                spin_lock_init(&__ctx->lock);
-               INIT_LIST_HEAD(&__ctx->rq_list);
+               for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++) {
+                       INIT_LIST_HEAD(&__ctx->rq_lists[k]);
+               }
                __ctx->queue = q;
 
                /*
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d1ed096723fb..d943d46b0785 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -18,8 +18,8 @@ struct blk_mq_ctxs {
 struct blk_mq_ctx {
        struct {
                spinlock_t              lock;
-               struct list_head        rq_list;
-       }  ____cacheline_aligned_in_smp;
+               struct list_head        rq_lists[HCTX_MAX_TYPES];
+       } ____cacheline_aligned_in_smp;
 
        unsigned int            cpu;
        unsigned short          index_hw[HCTX_MAX_TYPES];

-- 
Jens Axboe

Reply via email to