Re: [PATCH 3/3] scsi: avoid to hold host-wide counter of host_busy for scsi_mq

2018-04-28 Thread Ming Lei
On Fri, Apr 27, 2018 at 04:16:48PM +, Bart Van Assche wrote:
> On Fri, 2018-04-20 at 14:57 +0800, Ming Lei wrote:
> > +struct scsi_host_mq_in_flight {
> > +   int cnt;
> > +};
> > +
> > +static void scsi_host_check_in_flight(struct request *rq, void *data,
> > +   bool reserved)
> > +{
> > +   struct scsi_host_mq_in_flight *in_flight = data;
> > +
> > +   if (blk_mq_request_started(rq))
> > +   in_flight->cnt++;
> > +}
> > +
> >  /**
> >   * scsi_host_busy - Return the host busy counter
> >   * @shost: Pointer to Scsi_Host to inc.
> >   **/
> >  int scsi_host_busy(struct Scsi_Host *shost)
> >  {
> > -   return atomic_read(>host_busy);
> > +   struct scsi_host_mq_in_flight in_flight = {
> > +   .cnt = 0,
> > +   };
> > +
> > +   if (!shost->use_blk_mq)
> > +   return atomic_read(>host_busy);
> > +
> > +   blk_mq_tagset_busy_iter(>tag_set, scsi_host_check_in_flight,
> > +   _flight);
> > +   return in_flight.cnt;
> >  }
> >  EXPORT_SYMBOL(scsi_host_busy);
> 
> This patch introduces a subtle behavior change that has not been explained
> in the commit message. If a SCSI request gets requeued that results in a
> decrease of the .host_busy counter by scsi_device_unbusy() before the request
> is requeued and an increase of the host_busy counter when scsi_queue_rq() is
> called again. During that time such requests have the state MQ_RQ_COMPLETE and
> hence blk_mq_request_started() will return true and 
> scsi_host_check_in_flight()

No, __blk_mq_requeue_request() will change the rq state into MQ_RQ_IDLE,
so such issue you worried about, please look at scsi_mq_requeue_cmd(),
which calls blk_mq_requeue_request(), which puts driver tag and updates
rq's state to IDLE.

> will include these requests. In other words, this patch introduces a subtle
> behavior change that has not been explained in the commit message. Hence I'm
> doubt that this change is correct.

As I explained above, no such issue.


Thanks,
Ming


Re: [PATCH 3/3] scsi: avoid to hold host-wide counter of host_busy for scsi_mq

2018-04-27 Thread Bart Van Assche
On Fri, 2018-04-20 at 14:57 +0800, Ming Lei wrote:
> +struct scsi_host_mq_in_flight {
> + int cnt;
> +};
> +
> +static void scsi_host_check_in_flight(struct request *rq, void *data,
> + bool reserved)
> +{
> + struct scsi_host_mq_in_flight *in_flight = data;
> +
> + if (blk_mq_request_started(rq))
> + in_flight->cnt++;
> +}
> +
>  /**
>   * scsi_host_busy - Return the host busy counter
>   * @shost:   Pointer to Scsi_Host to inc.
>   **/
>  int scsi_host_busy(struct Scsi_Host *shost)
>  {
> - return atomic_read(>host_busy);
> + struct scsi_host_mq_in_flight in_flight = {
> + .cnt = 0,
> + };
> +
> + if (!shost->use_blk_mq)
> + return atomic_read(>host_busy);
> +
> + blk_mq_tagset_busy_iter(>tag_set, scsi_host_check_in_flight,
> + _flight);
> + return in_flight.cnt;
>  }
>  EXPORT_SYMBOL(scsi_host_busy);

This patch introduces a subtle behavior change that has not been explained
in the commit message. If a SCSI request gets requeued that results in a
decrease of the .host_busy counter by scsi_device_unbusy() before the request
is requeued and an increase of the host_busy counter when scsi_queue_rq() is
called again. During that time such requests have the state MQ_RQ_COMPLETE and
hence blk_mq_request_started() will return true and scsi_host_check_in_flight()
will include these requests. In other words, this patch introduces a subtle
behavior change that has not been explained in the commit message. Hence I'm
doubt that this change is correct.

Bart.





[PATCH 3/3] scsi: avoid to hold host-wide counter of host_busy for scsi_mq

2018-04-20 Thread Ming Lei
It isn't necessary to check the host depth in scsi_queue_rq() any more
since it has been respected by blk-mq before calling scsi_queue_rq() via
getting driver tag.

Lots of LUNs may attach to same host, and per-host IOPS may reach millions
level, so we should avoid to this expensive atomic operations on the
hostwide counter in IO path.

This patch implemens scsi_host_busy() via blk_mq_tagset_busy_iter() for
reading the count of busy IOs for scsi_mq.

It is observed that IOPS is increased by 15% in IO test on scsi_debug
(32 LUNs, 32 submit queues, 1024 can_queue, libaio/dio) in one
dual-socket system.

Cc: Omar Sandoval ,
Cc: "Martin K. Petersen" ,
Cc: James Bottomley ,
Cc: Christoph Hellwig ,
Cc: Don Brace 
Cc: Kashyap Desai 
Cc: Mike Snitzer 
Cc: Hannes Reinecke 
Cc: Laurence Oberman 
Signed-off-by: Ming Lei 
---
 drivers/scsi/hosts.c| 24 +++-
 drivers/scsi/scsi_lib.c | 23 +--
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 69beb30205f1..ad56e2b10ac8 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -564,13 +564,35 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_host_get);
 
+struct scsi_host_mq_in_flight {
+   int cnt;
+};
+
+static void scsi_host_check_in_flight(struct request *rq, void *data,
+   bool reserved)
+{
+   struct scsi_host_mq_in_flight *in_flight = data;
+
+   if (blk_mq_request_started(rq))
+   in_flight->cnt++;
+}
+
 /**
  * scsi_host_busy - Return the host busy counter
  * @shost: Pointer to Scsi_Host to inc.
  **/
 int scsi_host_busy(struct Scsi_Host *shost)
 {
-   return atomic_read(>host_busy);
+   struct scsi_host_mq_in_flight in_flight = {
+   .cnt = 0,
+   };
+
+   if (!shost->use_blk_mq)
+   return atomic_read(>host_busy);
+
+   blk_mq_tagset_busy_iter(>tag_set, scsi_host_check_in_flight,
+   _flight);
+   return in_flight.cnt;
 }
 EXPORT_SYMBOL(scsi_host_busy);
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0dfec0dedd5e..dc437c642934 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -345,7 +345,8 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost)
unsigned long flags;
 
rcu_read_lock();
-   atomic_dec(>host_busy);
+   if (!shost->use_blk_mq)
+   atomic_dec(>host_busy);
if (unlikely(scsi_host_in_recovery(shost))) {
spin_lock_irqsave(shost->host_lock, flags);
if (shost->host_failed || shost->host_eh_scheduled)
@@ -444,7 +445,12 @@ static inline bool scsi_target_is_busy(struct scsi_target 
*starget)
 
 static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
 {
-   if (shost->can_queue > 0 &&
+   /*
+* blk-mq can handle host queue busy efficiently via host-wide driver
+* tag allocation
+*/
+
+   if (!shost->use_blk_mq && shost->can_queue > 0 &&
atomic_read(>host_busy) >= shost->can_queue)
return true;
if (atomic_read(>host_blocked) > 0)
@@ -1539,9 +1545,12 @@ static inline int scsi_host_queue_ready(struct 
request_queue *q,
if (scsi_host_in_recovery(shost))
return 0;
 
-   busy = atomic_inc_return(>host_busy) - 1;
+   if (!shost->use_blk_mq)
+   busy = atomic_inc_return(>host_busy) - 1;
+   else
+   busy = 0;
if (atomic_read(>host_blocked) > 0) {
-   if (busy)
+   if (busy || scsi_host_busy(shost))
goto starved;
 
/*
@@ -1555,7 +1564,7 @@ static inline int scsi_host_queue_ready(struct 
request_queue *q,
 "unblocking host at zero depth\n"));
}
 
-   if (shost->can_queue > 0 && busy >= shost->can_queue)
+   if (!shost->use_blk_mq && shost->can_queue > 0 && busy >= 
shost->can_queue)
goto starved;
if (shost->host_self_blocked)
goto starved;
@@ -1641,7 +1650,9 @@ static void scsi_kill_request(struct request *req, struct 
request_queue *q)
 * with the locks as normal issue path does.
 */
atomic_inc(>device_busy);
-   atomic_inc(>host_busy);
+
+   if (!shost->use_blk_mq)
+   atomic_inc(>host_busy);
if (starget->can_queue > 0)
atomic_inc(>target_busy);
 
-- 
2.9.5