Re: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-06 Thread Ming Lei
On Tue, Feb 06, 2018 at 09:39:26AM +0100, Hannes Reinecke wrote:
> On 02/05/2018 04:20 PM, Ming Lei wrote:
> > This patch uses .force_blk_mq to drive HPSA via SCSI_MQ, meantime maps
> > each reply queue to blk_mq's hw queue, then .queuecommand can always
> > choose the hw queue as the reply queue. And if no any online CPU is
> > mapped to one hw queue, request can't be submitted to this hw queue
> > at all, finally the irq affinity issue is solved.
> > 
> > Cc: Hannes Reinecke 
> > Cc: Arun Easi 
> > Cc: Omar Sandoval ,
> > Cc: "Martin K. Petersen" ,
> > Cc: James Bottomley ,
> > Cc: Christoph Hellwig ,
> > Cc: Don Brace 
> > Cc: Kashyap Desai 
> > Cc: Peter Rivera 
> > Cc: Paolo Bonzini 
> > Cc: Mike Snitzer 
> > Tested-by: Laurence Oberman 
> > Signed-off-by: Ming Lei 
> > ---
> >  drivers/scsi/hpsa.c | 51 
> > ++-
> >  1 file changed, 34 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
> > index 443eabf63a9f..e517a4c74a28 100644
> > --- a/drivers/scsi/hpsa.c
> > +++ b/drivers/scsi/hpsa.c
> > @@ -51,6 +51,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include "hpsa_cmd.h"
> > @@ -956,6 +957,13 @@ static struct device_attribute *hpsa_shost_attrs[] = {
> >  #define HPSA_NRESERVED_CMDS(HPSA_CMDS_RESERVED_FOR_DRIVER +\
> >  HPSA_MAX_CONCURRENT_PASSTHRUS)
> >  
> > +static int hpsa_map_queues(struct Scsi_Host *shost)
> > +{
> > +struct ctlr_info *h = shost_to_hba(shost);
> > +
> > +return blk_mq_pci_map_queues(>tag_set, h->pdev);
> > +}
> > +
> >  static struct scsi_host_template hpsa_driver_template = {
> > .module = THIS_MODULE,
> > .name   = HPSA,
> > @@ -974,10 +982,13 @@ static struct scsi_host_template hpsa_driver_template 
> > = {
> >  #ifdef CONFIG_COMPAT
> > .compat_ioctl   = hpsa_compat_ioctl,
> >  #endif
> > +   .map_queues = hpsa_map_queues,
> > .sdev_attrs = hpsa_sdev_attrs,
> > .shost_attrs = hpsa_shost_attrs,
> > .max_sectors = 1024,
> > .no_write_same = 1,
> > +   .force_blk_mq = 1,
> > +   .host_tagset = 1,
> >  };
> >  
> >  static inline u32 next_command(struct ctlr_info *h, u8 q)
> > @@ -1045,11 +1056,7 @@ static void set_performant_mode(struct ctlr_info *h, 
> > struct CommandList *c,
> > c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
> > if (unlikely(!h->msix_vectors))
> > return;
> > -   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> > -   c->Header.ReplyQueue =
> > -   raw_smp_processor_id() % h->nreply_queues;
> > -   else
> > -   c->Header.ReplyQueue = reply_queue % h->nreply_queues;
> > +   c->Header.ReplyQueue = reply_queue;
> > }
> >  }
> >  
> > @@ -1063,10 +1070,7 @@ static void set_ioaccel1_performant_mode(struct 
> > ctlr_info *h,
> >  * Tell the controller to post the reply to the queue for this
> >  * processor.  This seems to give the best I/O throughput.
> >  */
> > -   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> > -   cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
> > -   else
> > -   cp->ReplyQueue = reply_queue % h->nreply_queues;
> > +   cp->ReplyQueue = reply_queue;
> > /*
> >  * Set the bits in the address sent down to include:
> >  *  - performant mode bit (bit 0)
> > @@ -1087,10 +1091,7 @@ static void set_ioaccel2_tmf_performant_mode(struct 
> > ctlr_info *h,
> > /* Tell the controller to post the reply to the queue for this
> >  * processor.  This seems to give the best I/O throughput.
> >  */
> > -   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> > -   cp->reply_queue = smp_processor_id() % h->nreply_queues;
> > -   else
> > -   cp->reply_queue = reply_queue % h->nreply_queues;
> > +   cp->reply_queue = reply_queue;
> > /* Set the bits in the address sent down to include:
> >  *  - performant mode bit not used in ioaccel mode 2
> >  *  - pull count (bits 0-3)
> > @@ -1109,10 +1110,7 @@ static void set_ioaccel2_performant_mode(struct 
> > ctlr_info *h,
> >  * Tell the controller to post the reply to the queue for this
> >  * processor.  This seems to give the best I/O throughput.
> >  */
> > -   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> > -   cp->reply_queue = smp_processor_id() % h->nreply_queues;
> > -   else
> > -   cp->reply_queue = reply_queue % h->nreply_queues;
> > +   cp->reply_queue = reply_queue;
> > /*
> >  * Set the 

Re: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-06 Thread Hannes Reinecke
On 02/05/2018 04:20 PM, Ming Lei wrote:
> This patch uses .force_blk_mq to drive HPSA via SCSI_MQ, meantime maps
> each reply queue to blk_mq's hw queue, then .queuecommand can always
> choose the hw queue as the reply queue. And if no any online CPU is
> mapped to one hw queue, request can't be submitted to this hw queue
> at all, finally the irq affinity issue is solved.
> 
> Cc: Hannes Reinecke 
> Cc: Arun Easi 
> Cc: Omar Sandoval ,
> Cc: "Martin K. Petersen" ,
> Cc: James Bottomley ,
> Cc: Christoph Hellwig ,
> Cc: Don Brace 
> Cc: Kashyap Desai 
> Cc: Peter Rivera 
> Cc: Paolo Bonzini 
> Cc: Mike Snitzer 
> Tested-by: Laurence Oberman 
> Signed-off-by: Ming Lei 
> ---
>  drivers/scsi/hpsa.c | 51 ++-
>  1 file changed, 34 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
> index 443eabf63a9f..e517a4c74a28 100644
> --- a/drivers/scsi/hpsa.c
> +++ b/drivers/scsi/hpsa.c
> @@ -51,6 +51,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include "hpsa_cmd.h"
> @@ -956,6 +957,13 @@ static struct device_attribute *hpsa_shost_attrs[] = {
>  #define HPSA_NRESERVED_CMDS  (HPSA_CMDS_RESERVED_FOR_DRIVER +\
>HPSA_MAX_CONCURRENT_PASSTHRUS)
>  
> +static int hpsa_map_queues(struct Scsi_Host *shost)
> +{
> +struct ctlr_info *h = shost_to_hba(shost);
> +
> +return blk_mq_pci_map_queues(>tag_set, h->pdev);
> +}
> +
>  static struct scsi_host_template hpsa_driver_template = {
>   .module = THIS_MODULE,
>   .name   = HPSA,
> @@ -974,10 +982,13 @@ static struct scsi_host_template hpsa_driver_template = 
> {
>  #ifdef CONFIG_COMPAT
>   .compat_ioctl   = hpsa_compat_ioctl,
>  #endif
> + .map_queues = hpsa_map_queues,
>   .sdev_attrs = hpsa_sdev_attrs,
>   .shost_attrs = hpsa_shost_attrs,
>   .max_sectors = 1024,
>   .no_write_same = 1,
> + .force_blk_mq = 1,
> + .host_tagset = 1,
>  };
>  
>  static inline u32 next_command(struct ctlr_info *h, u8 q)
> @@ -1045,11 +1056,7 @@ static void set_performant_mode(struct ctlr_info *h, 
> struct CommandList *c,
>   c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
>   if (unlikely(!h->msix_vectors))
>   return;
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - c->Header.ReplyQueue =
> - raw_smp_processor_id() % h->nreply_queues;
> - else
> - c->Header.ReplyQueue = reply_queue % h->nreply_queues;
> + c->Header.ReplyQueue = reply_queue;
>   }
>  }
>  
> @@ -1063,10 +1070,7 @@ static void set_ioaccel1_performant_mode(struct 
> ctlr_info *h,
>* Tell the controller to post the reply to the queue for this
>* processor.  This seems to give the best I/O throughput.
>*/
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
> - else
> - cp->ReplyQueue = reply_queue % h->nreply_queues;
> + cp->ReplyQueue = reply_queue;
>   /*
>* Set the bits in the address sent down to include:
>*  - performant mode bit (bit 0)
> @@ -1087,10 +1091,7 @@ static void set_ioaccel2_tmf_performant_mode(struct 
> ctlr_info *h,
>   /* Tell the controller to post the reply to the queue for this
>* processor.  This seems to give the best I/O throughput.
>*/
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - cp->reply_queue = smp_processor_id() % h->nreply_queues;
> - else
> - cp->reply_queue = reply_queue % h->nreply_queues;
> + cp->reply_queue = reply_queue;
>   /* Set the bits in the address sent down to include:
>*  - performant mode bit not used in ioaccel mode 2
>*  - pull count (bits 0-3)
> @@ -1109,10 +1110,7 @@ static void set_ioaccel2_performant_mode(struct 
> ctlr_info *h,
>* Tell the controller to post the reply to the queue for this
>* processor.  This seems to give the best I/O throughput.
>*/
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - cp->reply_queue = smp_processor_id() % h->nreply_queues;
> - else
> - cp->reply_queue = reply_queue % h->nreply_queues;
> + cp->reply_queue = reply_queue;
>   /*
>* Set the bits in the address sent down to include:
>*  - performant mode bit not used in ioaccel mode 2
> @@ -1152,11 +1150,27 @@ static void 
> dial_up_lockup_detection_on_fw_flash_complete(struct 

Re: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-06 Thread Ming Lei
Hello chenxiang,

On Tue, Feb 06, 2018 at 10:18:19AM +0800, chenxiang (M) wrote:
> 在 2018/2/5 23:20, Ming Lei 写道:
> > This patch uses .force_blk_mq to drive HPSA via SCSI_MQ, meantime maps
> > each reply queue to blk_mq's hw queue, then .queuecommand can always
> > choose the hw queue as the reply queue. And if no any online CPU is
> > mapped to one hw queue, request can't be submitted to this hw queue
> > at all, finally the irq affinity issue is solved.
> > 
> > Cc: Hannes Reinecke 
> > Cc: Arun Easi 
> > Cc: Omar Sandoval ,
> > Cc: "Martin K. Petersen" ,
> > Cc: James Bottomley ,
> > Cc: Christoph Hellwig ,
> > Cc: Don Brace 
> > Cc: Kashyap Desai 
> > Cc: Peter Rivera 
> > Cc: Paolo Bonzini 
> > Cc: Mike Snitzer 
> > Tested-by: Laurence Oberman 
> > Signed-off-by: Ming Lei 
> > ---
> >   drivers/scsi/hpsa.c | 51 
> > ++-
> >   1 file changed, 34 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
> > index 443eabf63a9f..e517a4c74a28 100644
> > --- a/drivers/scsi/hpsa.c
> > +++ b/drivers/scsi/hpsa.c
> > @@ -51,6 +51,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >   #include 
> >   #include 
> >   #include "hpsa_cmd.h"
> > @@ -956,6 +957,13 @@ static struct device_attribute *hpsa_shost_attrs[] = {
> >   #define HPSA_NRESERVED_CMDS   (HPSA_CMDS_RESERVED_FOR_DRIVER +\
> >  HPSA_MAX_CONCURRENT_PASSTHRUS)
> > +static int hpsa_map_queues(struct Scsi_Host *shost)
> > +{
> > +struct ctlr_info *h = shost_to_hba(shost);
> > +
> > +return blk_mq_pci_map_queues(>tag_set, h->pdev);
> > +}
> > +
> 
> Hi Lei Ming,
> It is okay to use blk_mq_pci_map_queue to solve automatic irq affinity issue
> when the first interrupt vector for queues is 0.
> But if the first interrupt vector for queues is not 0,  we seems couldn't
> use blk_mq_pci_map_queue directly,
> such as blk_mq_virtio_map_queues, it realizes a interface itself. Is it
> possible to provide a general interface for those
> situations?

I guess it isn't necessary to do that, as you see .map_queues has been
introduced to 'scsi_host_template' for dealing driver specific irq
vector difference, such as, virtio-pci, 'irq_affinity' is needed for
excluding 'pre_vectors' which should serve as virtio config vector.

But that should belong to another topic about implementing generic
.map_queues interface, and seems not related with this patch, since
the usage of blk_mq_pci_map_queues() in this patch is correct.

Thanks,
Ming


Re: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-05 Thread chenxiang (M)

在 2018/2/5 23:20, Ming Lei 写道:

This patch uses .force_blk_mq to drive HPSA via SCSI_MQ, meantime maps
each reply queue to blk_mq's hw queue, then .queuecommand can always
choose the hw queue as the reply queue. And if no any online CPU is
mapped to one hw queue, request can't be submitted to this hw queue
at all, finally the irq affinity issue is solved.

Cc: Hannes Reinecke 
Cc: Arun Easi 
Cc: Omar Sandoval ,
Cc: "Martin K. Petersen" ,
Cc: James Bottomley ,
Cc: Christoph Hellwig ,
Cc: Don Brace 
Cc: Kashyap Desai 
Cc: Peter Rivera 
Cc: Paolo Bonzini 
Cc: Mike Snitzer 
Tested-by: Laurence Oberman 
Signed-off-by: Ming Lei 
---
  drivers/scsi/hpsa.c | 51 ++-
  1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 443eabf63a9f..e517a4c74a28 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -51,6 +51,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include "hpsa_cmd.h"
@@ -956,6 +957,13 @@ static struct device_attribute *hpsa_shost_attrs[] = {
  #define HPSA_NRESERVED_CMDS   (HPSA_CMDS_RESERVED_FOR_DRIVER +\
 HPSA_MAX_CONCURRENT_PASSTHRUS)
  
+static int hpsa_map_queues(struct Scsi_Host *shost)

+{
+struct ctlr_info *h = shost_to_hba(shost);
+
+return blk_mq_pci_map_queues(>tag_set, h->pdev);
+}
+


Hi Lei Ming,
It is okay to use blk_mq_pci_map_queue to solve automatic irq affinity 
issue when the first interrupt vector for queues is 0.
But if the first interrupt vector for queues is not 0,  we seems 
couldn't use blk_mq_pci_map_queue directly,
such as blk_mq_virtio_map_queues, it realizes a interface itself. Is it 
possible to provide a general interface for those

situations?



  static struct scsi_host_template hpsa_driver_template = {
.module = THIS_MODULE,
.name   = HPSA,
@@ -974,10 +982,13 @@ static struct scsi_host_template hpsa_driver_template = {
  #ifdef CONFIG_COMPAT
.compat_ioctl   = hpsa_compat_ioctl,
  #endif
+   .map_queues = hpsa_map_queues,
.sdev_attrs = hpsa_sdev_attrs,
.shost_attrs = hpsa_shost_attrs,
.max_sectors = 1024,
.no_write_same = 1,
+   .force_blk_mq = 1,
+   .host_tagset = 1,
  };
  
  static inline u32 next_command(struct ctlr_info *h, u8 q)

@@ -1045,11 +1056,7 @@ static void set_performant_mode(struct ctlr_info *h, 
struct CommandList *c,
c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
if (unlikely(!h->msix_vectors))
return;
-   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
-   c->Header.ReplyQueue =
-   raw_smp_processor_id() % h->nreply_queues;
-   else
-   c->Header.ReplyQueue = reply_queue % h->nreply_queues;
+   c->Header.ReplyQueue = reply_queue;
}
  }
  
@@ -1063,10 +1070,7 @@ static void set_ioaccel1_performant_mode(struct ctlr_info *h,

 * Tell the controller to post the reply to the queue for this
 * processor.  This seems to give the best I/O throughput.
 */
-   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
-   cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
-   else
-   cp->ReplyQueue = reply_queue % h->nreply_queues;
+   cp->ReplyQueue = reply_queue;
/*
 * Set the bits in the address sent down to include:
 *  - performant mode bit (bit 0)
@@ -1087,10 +1091,7 @@ static void set_ioaccel2_tmf_performant_mode(struct 
ctlr_info *h,
/* Tell the controller to post the reply to the queue for this
 * processor.  This seems to give the best I/O throughput.
 */
-   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
-   cp->reply_queue = smp_processor_id() % h->nreply_queues;
-   else
-   cp->reply_queue = reply_queue % h->nreply_queues;
+   cp->reply_queue = reply_queue;
/* Set the bits in the address sent down to include:
 *  - performant mode bit not used in ioaccel mode 2
 *  - pull count (bits 0-3)
@@ -1109,10 +1110,7 @@ static void set_ioaccel2_performant_mode(struct 
ctlr_info *h,
 * Tell the controller to post the reply to the queue for this
 * processor.  This seems to give the best I/O throughput.
 */
-   if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
-   cp->reply_queue = smp_processor_id() % h->nreply_queues;
-   else
-   cp->reply_queue = reply_queue % 

RE: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-05 Thread Don Brace
> -Original Message-
> From: Laurence Oberman [mailto:lober...@redhat.com]
> Sent: Monday, February 05, 2018 9:58 AM
> To: Ming Lei <ming@redhat.com>; Jens Axboe <ax...@kernel.dk>; linux-
> bl...@vger.kernel.org; Christoph Hellwig <h...@infradead.org>; Mike Snitzer
> <snit...@redhat.com>; Don Brace <don.br...@microsemi.com>
> Cc: linux-s...@vger.kernel.org; Hannes Reinecke <h...@suse.de>; Arun Easi
> <arun.e...@cavium.com>; Omar Sandoval <osan...@fb.com>; Martin K .
> Petersen <martin.peter...@oracle.com>; James Bottomley
> <james.bottom...@hansenpartnership.com>; Christoph Hellwig <h...@lst.de>;
> Don Brace <don.br...@microsemi.com>; Kashyap Desai
> <kashyap.de...@broadcom.com>; Peter Rivera <peter.riv...@broadcom.com>;
> Paolo Bonzini <pbonz...@redhat.com>
> Subject: Re: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue
> 
> EXTERNAL EMAIL
> 
> 
> On Mon, 2018-02-05 at 23:20 +0800, Ming Lei wrote:
> > This patch uses .force_blk_mq to drive HPSA via SCSI_MQ, meantime
> > maps
> > each reply queue to blk_mq's hw queue, then .queuecommand can always
> > choose the hw queue as the reply queue. And if no any online CPU is
> > mapped to one hw queue, request can't be submitted to this hw queue
> > at all, finally the irq affinity issue is solved.
> >
> > Cc: Hannes Reinecke <h...@suse.de>
> > Cc: Arun Easi <arun.e...@cavium.com>
> > Cc: Omar Sandoval <osan...@fb.com>,
> > Cc: "Martin K. Petersen" <martin.peter...@oracle.com>,
> > Cc: James Bottomley <james.bottom...@hansenpartnership.com>,
> > Cc: Christoph Hellwig <h...@lst.de>,
> > Cc: Don Brace <don.br...@microsemi.com>
> > Cc: Kashyap Desai <kashyap.de...@broadcom.com>
> > Cc: Peter Rivera <peter.riv...@broadcom.com>
> > Cc: Paolo Bonzini <pbonz...@redhat.com>
> > Cc: Mike Snitzer <snit...@redhat.com>
> > Tested-by: Laurence Oberman <lober...@redhat.com>
> > Signed-off-by: Ming Lei <ming@redhat.com>
> > ---
> >  drivers/scsi/hpsa.c | 51 ++-
> This is a critical issue on the HPSA because Linus already has the
> original commit that causes the system to fail to boot.
> 
> All my testing was on DL380 G7 servers with:
> 
> Hewlett-Packard Company Smart Array G6 controllers
> Vendor: HP   Model: P410iRev: 6.64
> 
> Ming's patch fixes this so we need to try move this along.
> 
> I have a DL380 G8 as well which is also likely exposed here and I added
>  Don Brace for FYI to this list.
> 
> Thanks Ming

Tested-by: Don Brace <don.br...@microsemi.com>
P441, P431, P830i, H240

Acked-by: Don Brace <don.br...@microsemi.com>





RE: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-05 Thread Don Brace
> -Original Message-
> This is a critical issue on the HPSA because Linus already has the
> original commit that causes the system to fail to boot.
> 
> All my testing was on DL380 G7 servers with:
> 
> Hewlett-Packard Company Smart Array G6 controllers
> Vendor: HP   Model: P410iRev: 6.64
> 
> Ming's patch fixes this so we need to try move this along.
> 
> I have a DL380 G8 as well which is also likely exposed here and I added
>  Don Brace for FYI to this list.
> 
> Thanks Ming

Running some tests now.


Re: [PATCH V2 8/8] scsi: hpsa: use blk_mq to solve irq affinity issue

2018-02-05 Thread Laurence Oberman
On Mon, 2018-02-05 at 23:20 +0800, Ming Lei wrote:
> This patch uses .force_blk_mq to drive HPSA via SCSI_MQ, meantime
> maps
> each reply queue to blk_mq's hw queue, then .queuecommand can always
> choose the hw queue as the reply queue. And if no any online CPU is
> mapped to one hw queue, request can't be submitted to this hw queue
> at all, finally the irq affinity issue is solved.
> 
> Cc: Hannes Reinecke 
> Cc: Arun Easi 
> Cc: Omar Sandoval ,
> Cc: "Martin K. Petersen" ,
> Cc: James Bottomley ,
> Cc: Christoph Hellwig ,
> Cc: Don Brace 
> Cc: Kashyap Desai 
> Cc: Peter Rivera 
> Cc: Paolo Bonzini 
> Cc: Mike Snitzer 
> Tested-by: Laurence Oberman 
> Signed-off-by: Ming Lei 
> ---
>  drivers/scsi/hpsa.c | 51 ++-
> 
>  1 file changed, 34 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
> index 443eabf63a9f..e517a4c74a28 100644
> --- a/drivers/scsi/hpsa.c
> +++ b/drivers/scsi/hpsa.c
> @@ -51,6 +51,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include "hpsa_cmd.h"
> @@ -956,6 +957,13 @@ static struct device_attribute
> *hpsa_shost_attrs[] = {
>  #define HPSA_NRESERVED_CMDS  (HPSA_CMDS_RESERVED_FOR_DRIVER +\
>    HPSA_MAX_CONCURRENT_PASSTHRUS)
>  
> +static int hpsa_map_queues(struct Scsi_Host *shost)
> +{
> +struct ctlr_info *h = shost_to_hba(shost);
> +
> +return blk_mq_pci_map_queues(>tag_set, h->pdev);
> +}
> +
>  static struct scsi_host_template hpsa_driver_template = {
>   .module = THIS_MODULE,
>   .name   = HPSA,
> @@ -974,10 +982,13 @@ static struct scsi_host_template
> hpsa_driver_template = {
>  #ifdef CONFIG_COMPAT
>   .compat_ioctl   = hpsa_compat_ioctl,
>  #endif
> + .map_queues = hpsa_map_queues,
>   .sdev_attrs = hpsa_sdev_attrs,
>   .shost_attrs = hpsa_shost_attrs,
>   .max_sectors = 1024,
>   .no_write_same = 1,
> + .force_blk_mq = 1,
> + .host_tagset = 1,
>  };
>  
>  static inline u32 next_command(struct ctlr_info *h, u8 q)
> @@ -1045,11 +1056,7 @@ static void set_performant_mode(struct
> ctlr_info *h, struct CommandList *c,
>   c->busaddr |= 1 | (h->blockFetchTable[c-
> >Header.SGList] << 1);
>   if (unlikely(!h->msix_vectors))
>   return;
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - c->Header.ReplyQueue =
> - raw_smp_processor_id() % h-
> >nreply_queues;
> - else
> - c->Header.ReplyQueue = reply_queue % h-
> >nreply_queues;
> + c->Header.ReplyQueue = reply_queue;
>   }
>  }
>  
> @@ -1063,10 +1070,7 @@ static void
> set_ioaccel1_performant_mode(struct ctlr_info *h,
>    * Tell the controller to post the reply to the queue for
> this
>    * processor.  This seems to give the best I/O throughput.
>    */
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - cp->ReplyQueue = smp_processor_id() % h-
> >nreply_queues;
> - else
> - cp->ReplyQueue = reply_queue % h->nreply_queues;
> + cp->ReplyQueue = reply_queue;
>   /*
>    * Set the bits in the address sent down to include:
>    *  - performant mode bit (bit 0)
> @@ -1087,10 +1091,7 @@ static void
> set_ioaccel2_tmf_performant_mode(struct ctlr_info *h,
>   /* Tell the controller to post the reply to the queue for
> this
>    * processor.  This seems to give the best I/O throughput.
>    */
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - cp->reply_queue = smp_processor_id() % h-
> >nreply_queues;
> - else
> - cp->reply_queue = reply_queue % h->nreply_queues;
> + cp->reply_queue = reply_queue;
>   /* Set the bits in the address sent down to include:
>    *  - performant mode bit not used in ioaccel mode 2
>    *  - pull count (bits 0-3)
> @@ -1109,10 +1110,7 @@ static void
> set_ioaccel2_performant_mode(struct ctlr_info *h,
>    * Tell the controller to post the reply to the queue for
> this
>    * processor.  This seems to give the best I/O throughput.
>    */
> - if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
> - cp->reply_queue = smp_processor_id() % h-
> >nreply_queues;
> - else
> - cp->reply_queue = reply_queue % h->nreply_queues;
> + cp->reply_queue = reply_queue;
>   /*
>    * Set the bits in the address sent down to include:
>    *  - performant mode bit not used in ioaccel mode 2
> @@ -1152,11 +1150,27 @@ static void
>