> Meantime please try the following patch and see if difference can be
made.
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c index
> 49d73d979cb3..d2abec3b0f60 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -589,7 +589,7 @@ static void __blk_mq_complete_request(struct
> request *rq)
> * So complete IO reqeust in softirq context in case of single
queue
> * for not degrading IO performance by irqsoff latency.
> */
> - if (q->nr_hw_queues == 1) {
> + if (q->nr_hw_queues == 1 || (rq->mq_hctx->flags &
> BLK_MQ_F_HOST_TAGS))
> +{
> __blk_complete_request(rq);
> return;
> }
> @@ -1977,7 +1977,8 @@ static blk_qc_t blk_mq_make_request(struct
> request_queue *q, struct bio *bio)
> /* bypass scheduler for flush rq */
> blk_insert_flush(rq);
> blk_mq_run_hw_queue(data.hctx, true);
> - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops-
> >commit_rqs)) {
> + } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs
> ||
> + (data.hctx->flags & BLK_MQ_F_HOST_TAGS)))
> {
> /*
> * Use plugging if we have a ->commit_rqs() hook as well,
as
> * we know the driver uses bd->last in a smart fashion.
Ming -
I tried above patch and no improvement in performance.
Below is perf record data - lock contention is while getting the tag
(blk_mq_get_tag )
6.67% 6.67% fio [kernel.vmlinux] [k]
native_queued_spin_lock_slowpath
- 6.66% io_submit
- 6.66% entry_SYSCALL_64
- do_syscall_64
- 6.66% __x64_sys_io_submit
- 6.66% io_submit_one
- 6.66% aio_read
- 6.66% generic_file_read_iter
- 6.66% blkdev_direct_IO
- 6.65% submit_bio
- generic_make_request
- 6.65% blk_mq_make_request
- 6.65% blk_mq_get_request
- 6.65% blk_mq_get_tag
- 6.58%
prepare_to_wait_exclusive
- 6.57%
_raw_spin_lock_irqsave
queued_spin_lock_slowpath
>
> thanks,
> Ming