On 12/01/2015 10:02 PM, wenxi...@linux.vnet.ibm.com wrote:
> From: Wen Xiong <wenxi...@linux.vnet.ibm.com>
> 
> Enable MSIX vector mapping to CPU affinity. This feature is
> enabled by setting ipr_cpu_map=1 when loading ipr module.
> 
> Signed-off-by: Wen Xiong <wenxi...@linux.vnet.ibm.com>
> ---
>  drivers/scsi/ipr.c |  220 
> +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/scsi/ipr.h |   16 ++++
>  2 files changed, 235 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> index 9f30887..494c0fc 100644
> --- a/drivers/scsi/ipr.c
> +++ b/drivers/scsi/ipr.c
> @@ -100,6 +100,11 @@ static unsigned int ipr_max_devs = 
> IPR_DEFAULT_SIS64_DEVS;
>  static unsigned int ipr_dual_ioa_raid = 1;
>  static unsigned int ipr_number_of_msix = 16;
>  static unsigned int ipr_fast_reboot;
> +static unsigned int ipr_cpu_map;
> +
> +static unsigned int *ipr_used_cpu;

How about just defining this like:

static unsigned int ipr_used_cpu[NR_CPUS];

Then we can eliminate the kmalloc / kfree of this and simplify this bit of the 
code slightly.

> +static unsigned int ipr_possible_cpu_cnt;
> +
>  static DEFINE_SPINLOCK(ipr_driver_lock);
> 
>  /* This table describes the differences between DMA controller chips */
> @@ -224,6 +229,8 @@ module_param_named(number_of_msix, ipr_number_of_msix, 
> int, 0);
>  MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to 
> use on capable adapters (1 - 16).  (default:16)");
>  module_param_named(fast_reboot, ipr_fast_reboot, int, S_IRUGO | S_IWUSR);
>  MODULE_PARM_DESC(fast_reboot, "Skip adapter shutdown during reboot. Set to 1 
> to enable. (default: 0)");
> +module_param_named(cpu_map, ipr_cpu_map,  int, 0);
> +MODULE_PARM_DESC(cpu_map, "Enable CPU affinity per adapter. (default:0)");

Can we enable this by default? Would be nice for the default settings to 
provide the best performance
under normal situations.

>  MODULE_LICENSE("GPL");
>  MODULE_VERSION(IPR_DRIVER_VERSION);
> 
> @@ -1053,6 +1060,17 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd 
> *ipr_cmd,
>  static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg)
>  {
>       unsigned int hrrq;
> +     struct ipr_vector_map_info *map_per_cpu;
> +     u32 cpu;
> +
> +     if (ioa_cfg->cpu_map && ioa_cfg->hrrq_num > 1) {
> +             cpu = raw_smp_processor_id();

Not sure this is what we want here. I think what we want to do is to set the 
irq affinity to be any thread on
the core, so you'd find the first available CPU, then use cpu_sibling_mask to 
figure out all the CPUs on the core
which is what you'd use to set the affinity. Then in here, we'd just use 
smp_processor_id() and find the HRRQ
that maps to.

I'm OK leaving this as a follow on optimization though since I'd like to get 
this patch moving. 


> +             map_per_cpu = ioa_cfg->cpu_map_tbl;
> +             if (cpu < ioa_cfg->possible_cpu_cnt) {
> +                     map_per_cpu += cpu;
> +                     return map_per_cpu->hrrq_id;
> +             }
> +     }
> 
>       if (ioa_cfg->hrrq_num == 1)
>               hrrq = 0;
> @@ -4086,6 +4104,77 @@ static struct device_attribute ipr_ioa_fw_type_attr = {
>       .show = ipr_show_fw_type
>  };
> 
> +static ssize_t
> +ipr_show_cpu_map_tbl(struct device *dev, struct device_attribute *attr,
> +                     char *buf)
> +{
> +     struct Scsi_Host *shost = class_to_shost(dev);
> +     struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
> +     struct ipr_vector_map_info *map_per_cpu;
> +     int  len = 0, show_current_cpu = 0;
> +
> +     if (ioa_cfg->intr_flag != IPR_USE_MSIX)
> +             return len;
> +
> +     switch (ioa_cfg->cpu_map) {
> +     case 0:
> +             len += snprintf(buf + len, PAGE_SIZE - len,
> +                     "ipr driver cpu map: No mapping (%d)\n",
> +                     ioa_cfg->cpu_map);
> +             return len;
> +     case 1:
> +             len += snprintf(buf + len, PAGE_SIZE - len,
> +                     "ipr driver cpu map: IRQ vector to CPU mapping (%d): "
> +                     "%d online CPUs\n",
> +                     ioa_cfg->cpu_map,
> +                     num_online_cpus());
> +             break;
> +     }
> +
> +     while (show_current_cpu < ioa_cfg->possible_cpu_cnt) {
> +             map_per_cpu = &ioa_cfg->cpu_map_tbl[show_current_cpu];
> +
> +             if (map_per_cpu->irq_id == IPR_VECTOR_MAP_EMPTY)
> +                     len += snprintf(buf + len, PAGE_SIZE - len,
> +                                     "CPU %02d io_hrrq %02d\n",
> +                                     show_current_cpu,
> +                                     map_per_cpu->hrrq_id);
> +             else
> +                     len += snprintf(buf + len, PAGE_SIZE - len,
> +                                     "CPU %02d io_hrrq %02d "
> +                                     " IRQ=%d\n",
> +                                     show_current_cpu,
> +                                     map_per_cpu->hrrq_id, 
> map_per_cpu->irq_id);
> +
> +             show_current_cpu++;
> +
> +             if (show_current_cpu < ioa_cfg->possible_cpu_cnt
> +                             && (len >= (PAGE_SIZE - 64))) {
> +                     len += snprintf(buf + len, PAGE_SIZE - len, 
> "more...\n");
> +                     break;
> +             }
> +     }
> +
> +     return len;
> +}
> +
> +static ssize_t
> +ipr_store_cpu_map_tbl(struct device *dev, struct device_attribute *attr,
> +                     const char *buf, size_t count)
> +{
> +     int status = -EINVAL;
> +     return status;
> +}
> +
> +static struct device_attribute ipr_cpu_map_attr = {
> +     .attr = {
> +             .name =         "cpu_map",
> +             .mode =         S_IRUGO | S_IWUSR,

Since we don't support writing the attribute, let's change the mode to just be 
S_IRUGO
and then we don't need to have a ipr_store_cpu_map_tbl function.

> +     },
> +     .show = ipr_show_cpu_map_tbl,
> +     .store = ipr_store_cpu_map_tbl
> +};
> +
>  static struct device_attribute *ipr_ioa_attrs[] = {
>       &ipr_fw_version_attr,
>       &ipr_log_level_attr,
> @@ -4095,6 +4184,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
>       &ipr_update_fw_attr,
>       &ipr_ioa_fw_type_attr,
>       &ipr_iopoll_weight_attr,
> +     &ipr_cpu_map_attr,
>       NULL,
>  };
> 
> @@ -9333,6 +9423,8 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg)
> 
>       ipr_free_dump(ioa_cfg);
>       kfree(ioa_cfg->trace);
> +     if (ioa_cfg->cpu_map)
> +             kfree(ioa_cfg->cpu_map_tbl);

Its valid to call kfree on a NULL pointer, so the if check isn't technically 
required.

>  }
> 
>  /**
> @@ -9352,9 +9444,13 @@ static void ipr_free_irqs(struct ipr_ioa_cfg *ioa_cfg)
>       if (ioa_cfg->intr_flag == IPR_USE_MSI ||
>           ioa_cfg->intr_flag == IPR_USE_MSIX) {
>               int i;
> -             for (i = 0; i < ioa_cfg->nvectors; i++)
> +             for (i = 0; i < ioa_cfg->nvectors; i++) {
> +                     if (ioa_cfg->cpu_map)
> +                             irq_set_affinity_hint(
> +                                     ioa_cfg->vectors_info[i].vec, NULL);
>                       free_irq(ioa_cfg->vectors_info[i].vec,
>                                &ioa_cfg->hrrq[i]);
> +             }
>       } else
>               free_irq(pdev->irq, &ioa_cfg->hrrq[0]);
> 
> @@ -9585,11 +9681,35 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
>       if (!ioa_cfg->trace)
>               goto out_free_hostrcb_dma;
> 
> +     if (ioa_cfg->cpu_map) {
> +             ioa_cfg->cpu_map_tbl =
> +                     kzalloc(sizeof(struct ipr_vector_map_info) *
> +                     ioa_cfg->possible_cpu_cnt, GFP_KERNEL);
> +
> +             if (!ioa_cfg->cpu_map_tbl)
> +                     goto out_free_trace;
> +     }
> +
> +     if (ipr_used_cpu == NULL) {
> +             ipr_used_cpu = kzalloc((sizeof(uint16_t) *
> +                             ipr_possible_cpu_cnt), GFP_KERNEL);
> +             if (!ipr_used_cpu)
> +                     goto out_free_cpu_map_tbl;
> +
> +             for (i = 0; i < ipr_possible_cpu_cnt; i++)
> +                     ipr_used_cpu[i] = IPR_VECTOR_MAP_EMPTY;
> +     }
> +
>       rc = 0;
>  out:
>       LEAVE;
>       return rc;
> 
> +out_free_cpu_map_tbl:
> +     if (ioa_cfg->cpu_map)
> +             kfree(ioa_cfg->cpu_map_tbl);
> +out_free_trace:
> +     kfree(ioa_cfg->trace);
>  out_free_hostrcb_dma:
>       while (i-- > 0) {
>               dma_free_coherent(&pdev->dev, sizeof(struct ipr_hostrcb),
> @@ -9791,6 +9911,92 @@ static void ipr_wait_for_pci_err_recovery(struct 
> ipr_ioa_cfg *ioa_cfg)
>       }
>  }
> 
> +static int
> +ipr_find_next_free_cpu(struct ipr_ioa_cfg *ioa_cfg, u32 cpu_id)
> +{
> +     struct ipr_vector_map_info *map_per_cpu;
> +     int i;
> +
> +     map_per_cpu = ioa_cfg->cpu_map_tbl;
> +     for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++) {
> +             if (cpu_online(i)) {
> +                     if ((map_per_cpu->irq_id == IPR_VECTOR_MAP_EMPTY) &&
> +                             (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) &&
> +                             (map_per_cpu->cpu_id == cpu_id))
> +                                     return i;
> +             }
> +             map_per_cpu++;
> +     }
> +
> +     return IPR_VECTOR_MAP_EMPTY;
> +}
> +
> +static int ipr_set_cpu_affinity_for_hrrq(struct ipr_ioa_cfg *ioa_cfg,
> +                                     int vectors)
> +{
> +     struct ipr_vector_map_info *map_per_cpu;
> +     int cpu_idx = 0, hrrq_num, next_cpu_id, i;
> +
> +     memset(ioa_cfg->cpu_map_tbl, 0xff,
> +                     (sizeof(struct ipr_vector_map_info) *
> +                     ioa_cfg->possible_cpu_cnt));
> +
> +     map_per_cpu = ioa_cfg->cpu_map_tbl;
> +     for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++, map_per_cpu++)
> +             map_per_cpu->cpu_id = i;
> +
> +     for (i = 0; i < ipr_possible_cpu_cnt; i++) {
> +             if (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) {
> +                     cpu_idx = i;
> +                     break;
> +             }
> +     }
> +
> +     hrrq_num = 0;
> +     for (i = 0; i < vectors; i++) {
> +             map_per_cpu = ioa_cfg->cpu_map_tbl;
> +             next_cpu_id = ipr_find_next_free_cpu(ioa_cfg, cpu_idx);
> +
> +             if (next_cpu_id == IPR_VECTOR_MAP_EMPTY) {
> +                     int j;
> +
> +                     for (j = 0; j < i; j++)
> +                             irq_set_affinity_hint(
> +                                     ioa_cfg->vectors_info[j].vec, NULL);
> +                     ioa_cfg->cpu_map = 0;
> +                     dev_info(&ioa_cfg->pdev->dev, "Cannot set cpu 
> affinity\n");
> +                     return 0;
> +             }
> +
> +             map_per_cpu += next_cpu_id;
> +             ipr_used_cpu[next_cpu_id] = next_cpu_id;
> +
> +             map_per_cpu->irq_id = ioa_cfg->vectors_info[i].vec;
> +             map_per_cpu->hrrq_id = hrrq_num;
> +             irq_set_affinity_hint(ioa_cfg->vectors_info[i].vec,
> +                             get_cpu_mask(next_cpu_id));

Hmm. It looks like you've changed ipr_get_hrrq_index to use 
raw_smp_processor_id(), which
should return 

It would be interesting to see how performance is impacted here if we set the 
affinity
hint to be every 

> +             cpu_idx++;
> +             hrrq_num++;
> +     }
> +
> +     hrrq_num = 0;
> +     if (vectors < ioa_cfg->possible_cpu_cnt) {
> +             map_per_cpu = ioa_cfg->cpu_map_tbl;
> +             for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++) {
> +                     if (map_per_cpu->hrrq_id == IPR_VECTOR_MAP_EMPTY) {
> +                             map_per_cpu->hrrq_id = hrrq_num;
> +                             hrrq_num++;
> +                     }
> +
> +                     if (hrrq_num == ioa_cfg->hrrq_num)
> +                             hrrq_num = 0;
> +                     map_per_cpu++;
> +             }
> +     }
> +
> +     return 1;
> +}
> +
>  static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg)
>  {
>       struct msix_entry entries[IPR_MAX_MSIX_VECTORS];
> @@ -10045,6 +10251,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>       ioa_cfg->hdw_dma_regs = ipr_regs;
>       ioa_cfg->hdw_dma_regs_pci = ipr_regs_pci;
>       ioa_cfg->ioa_mailbox = ioa_cfg->chip_cfg->mailbox + ipr_regs;
> +     ioa_cfg->cpu_map = ipr_cpu_map;
> 
>       ipr_init_regs(ioa_cfg);
> 
> @@ -10106,6 +10313,8 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>               }
>       }
> 
> +     ioa_cfg->possible_cpu_cnt = ipr_possible_cpu_cnt;
> +
>       if (ioa_cfg->intr_flag == IPR_USE_MSI ||
>           ioa_cfg->intr_flag == IPR_USE_MSIX) {
>               rc = ipr_test_msi(ioa_cfg, pdev);
> @@ -10200,6 +10409,9 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>               goto cleanup_nolog;
>       }
> 
> +     if (ioa_cfg->cpu_map)
> +             ipr_set_cpu_affinity_for_hrrq(ioa_cfg, ioa_cfg->nvectors);
> +
>       if ((dev_id->driver_data & IPR_USE_PCI_WARM_RESET) ||
>           (dev_id->device == PCI_DEVICE_ID_IBM_OBSIDIAN_E && 
> !ioa_cfg->revid)) {
>               ioa_cfg->needs_warm_reset = 1;
> @@ -10644,9 +10856,14 @@ static struct notifier_block ipr_notifier = {
>   **/
>  static int __init ipr_init(void)
>  {
> +     int cpu;
>       ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n",
>                IPR_DRIVER_VERSION, IPR_DRIVER_DATE);
> 
> +     ipr_used_cpu = NULL;
> +     for_each_present_cpu(cpu)
> +             ipr_possible_cpu_cnt++;

I don't think this will handle CPU hotplug, but we can look at adding that 
support
in a follow on patch. 

Also, this could be simplified to just use num_present_cpus(). 

> +
>       register_reboot_notifier(&ipr_notifier);
>       return pci_register_driver(&ipr_driver);
>  }
> @@ -10663,6 +10880,7 @@ static void __exit ipr_exit(void)
>  {
>       unregister_reboot_notifier(&ipr_notifier);
>       pci_unregister_driver(&ipr_driver);
> +     kfree(ipr_used_cpu);
>  }
> 
>  module_init(ipr_init);
> diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
> index a34c7a5..a7ef83f 100644
> --- a/drivers/scsi/ipr.h
> +++ b/drivers/scsi/ipr.h
> @@ -1464,6 +1464,16 @@ enum ipr_sdt_state {
>       DUMP_OBTAINED
>  };
> 
> +#define IPR_VECTOR_MAP_EMPTY         0xffff
> +#define IPR_SET_IO_CPU_AFFINITY              0x1
> +
> +/* IRQ vector to CPU mapping when set CPU affinity*/
> +struct ipr_vector_map_info {
> +     u16 cpu_id;
> +     u16 irq_id;
> +     u16 hrrq_id;
> +};
> +
>  /* Per-controller data */
>  struct ipr_ioa_cfg {
>       char eye_catcher[8];
> @@ -1595,6 +1605,12 @@ struct ipr_ioa_cfg {
> 
>       u32 iopoll_weight;
> 
> +     u32 cpu_map;
> +     struct ipr_vector_map_info *cpu_map_tbl;
> +     u16 possible_cpu_cnt;
> +
> +     u32 use_blk_mq;
> +
>  }; /* struct ipr_ioa_cfg */
> 
>  struct ipr_cmnd {
> 


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


------------------------------------------------------------------------------
_______________________________________________
Iprdd-devel mailing list
Iprdd-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/iprdd-devel

Reply via email to