Re: [Iprdd-devel] [V3 PATCH 2/4] Enable vectors to cpu affinity mapping

Brian King Sun, 13 Mar 2016 15:06:30 -0700

On 03/09/2016 03:45 PM, [email protected] wrote:
> From: Wen Xiong <[email protected]>
> 
> Enable MSIX vector mapping to CPU affinity. This feature is
> enabled by setting ipr_cpu_map=1 when loading ipr module.
> 
> Signed-off-by: Wen Xiong <[email protected]>
> ---
>  drivers/scsi/ipr.c |  198 
> +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/scsi/ipr.h |   16 ++++
>  2 files changed, 213 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> index bb9ec50..468c690 100644
> --- a/drivers/scsi/ipr.c
> +++ b/drivers/scsi/ipr.c
> @@ -100,6 +100,11 @@ static unsigned int ipr_max_devs = 
> IPR_DEFAULT_SIS64_DEVS;
>  static unsigned int ipr_dual_ioa_raid = 1;
>  static unsigned int ipr_number_of_msix = 16;
>  static unsigned int ipr_fast_reboot;
> +static unsigned int ipr_cpu_map = 1;
> +
> +static unsigned int ipr_used_cpu[NR_CPUS];
> +static unsigned int ipr_possible_cpu_cnt;
> +
>  static DEFINE_SPINLOCK(ipr_driver_lock);
> 
>  /* This table describes the differences between DMA controller chips */
> @@ -224,6 +229,8 @@ module_param_named(number_of_msix, ipr_number_of_msix, 
> int, 0);
>  MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to 
> use on capable adapters (1 - 16).  (default:16)");
>  module_param_named(fast_reboot, ipr_fast_reboot, int, S_IRUGO | S_IWUSR);
>  MODULE_PARM_DESC(fast_reboot, "Skip adapter shutdown during reboot. Set to 1 
> to enable. (default: 0)");
> +module_param_named(cpu_map, ipr_cpu_map,  int, 0);
> +MODULE_PARM_DESC(cpu_map, "Enable CPU affinity per adapter. (default:1)");
>  MODULE_LICENSE("GPL");
>  MODULE_VERSION(IPR_DRIVER_VERSION);
> 
> @@ -1053,6 +1060,17 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd 
> *ipr_cmd,
>  static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg)
>  {
>       unsigned int hrrq;
> +     struct ipr_vector_map_info *map_per_cpu;
> +     u32 cpu;
> +
> +     if (ioa_cfg->cpu_map && ioa_cfg->hrrq_num > 1) {
> +             cpu = smp_processor_id();
> +             map_per_cpu = ioa_cfg->cpu_map_tbl;
> +             if (cpu < ioa_cfg->possible_cpu_cnt) {
> +                     map_per_cpu += cpu;
> +                     return map_per_cpu->hrrq_id;
> +             }
> +     }
> 
>       if (ioa_cfg->hrrq_num == 1)
>               hrrq = 0;
> @@ -4089,6 +4107,68 @@ static struct device_attribute ipr_ioa_fw_type_attr = {
>       .show = ipr_show_fw_type
>  };
> 
> +static ssize_t
> +ipr_show_cpu_map_tbl(struct device *dev, struct device_attribute *attr,
> +                     char *buf)
> +{
> +     struct Scsi_Host *shost = class_to_shost(dev);
> +     struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
> +     struct ipr_vector_map_info *map_per_cpu;
> +     int  len = 0, show_current_cpu = 0;
> +
> +     if (ioa_cfg->intr_flag != IPR_USE_MSIX)
> +             return len;
> +
> +     switch (ioa_cfg->cpu_map) {
> +     case 0:
> +             len += snprintf(buf + len, PAGE_SIZE - len,
> +                     "ipr driver cpu map: No mapping (%d)\n",
> +                     ioa_cfg->cpu_map);
> +             return len;
> +     case 1:
> +             len += snprintf(buf + len, PAGE_SIZE - len,
> +                     "ipr driver cpu map: IRQ vector to CPU mapping (%d): "
> +                     "%d online CPUs\n",
> +                     ioa_cfg->cpu_map,
> +                     num_online_cpus());
> +             break;
> +     }
> +
> +     while (show_current_cpu < ioa_cfg->possible_cpu_cnt) {
> +             map_per_cpu = &ioa_cfg->cpu_map_tbl[show_current_cpu];
> +
> +             if (map_per_cpu->irq_id == IPR_VECTOR_MAP_EMPTY)
> +                     len += snprintf(buf + len, PAGE_SIZE - len,
> +                                     "CPU %02d io_hrrq %02d\n",
> +                                     show_current_cpu,
> +                                     map_per_cpu->hrrq_id);
> +             else
> +                     len += snprintf(buf + len, PAGE_SIZE - len,
> +                                     "CPU %02d io_hrrq %02d "
> +                                     " IRQ=%d\n",
> +                                     show_current_cpu,
> +                                     map_per_cpu->hrrq_id, 
> map_per_cpu->irq_id);
> +
> +             show_current_cpu++;
> +
> +             if (show_current_cpu < ioa_cfg->possible_cpu_cnt
> +                             && (len >= (PAGE_SIZE - 64))) {
> +                     len += snprintf(buf + len, PAGE_SIZE - len, 
> "more...\n");
> +                     break;
> +             }
> +     }
> +
> +     return len;
> +}
> +
> +static struct device_attribute ipr_cpu_map_attr = {
> +     .attr = {
> +             .name =         "cpu_map",
> +             .mode =         S_IRUGO | S_IWUSR,


You indicate this attribute can be written to by root, but you don't implement 
a store function.

> +     },
> +     .show = ipr_show_cpu_map_tbl,
> +};
> +
>  static struct device_attribute *ipr_ioa_attrs[] = {
>       &ipr_fw_version_attr,
>       &ipr_log_level_attr,
> @@ -4098,6 +4178,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
>       &ipr_update_fw_attr,
>       &ipr_ioa_fw_type_attr,
>       &ipr_iopoll_weight_attr,
> +     &ipr_cpu_map_attr,
>       NULL,
>  };
> 
> @@ -9335,6 +9416,7 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg)
> 
>       ipr_free_dump(ioa_cfg);
>       kfree(ioa_cfg->trace);
> +     kfree(ioa_cfg->cpu_map_tbl);
>  }
> 
>  /**
> @@ -9354,9 +9436,13 @@ static void ipr_free_irqs(struct ipr_ioa_cfg *ioa_cfg)
>       if (ioa_cfg->intr_flag == IPR_USE_MSI ||
>           ioa_cfg->intr_flag == IPR_USE_MSIX) {
>               int i;
> -             for (i = 0; i < ioa_cfg->nvectors; i++)
> +             for (i = 0; i < ioa_cfg->nvectors; i++) {
> +                     if (ioa_cfg->cpu_map)
> +                             irq_set_affinity_hint(
> +                                     ioa_cfg->vectors_info[i].vec, NULL);
>                       free_irq(ioa_cfg->vectors_info[i].vec,
>                                &ioa_cfg->hrrq[i]);
> +             }
>       } else
>               free_irq(pdev->irq, &ioa_cfg->hrrq[0]);
> 
> @@ -9587,11 +9673,25 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
>       if (!ioa_cfg->trace)
>               goto out_free_hostrcb_dma;
> 
> +     if (ioa_cfg->cpu_map) {
> +             ioa_cfg->cpu_map_tbl =
> +                     kzalloc(sizeof(struct ipr_vector_map_info) *
> +                     ioa_cfg->possible_cpu_cnt, GFP_KERNEL);
> +
> +             if (!ioa_cfg->cpu_map_tbl)
> +                     goto out_free_trace;
> +     }
> +
> +     for (i = 0; i < ipr_possible_cpu_cnt; i++)
> +             ipr_used_cpu[i] = IPR_VECTOR_MAP_EMPTY;

It seems like this shouldn't be a global. You seem to only use it at probe 
time, so I
can see why you made it a global, but it just seems wrong. Seems like we should 
either
allocate this temp buffer dynamically at probe time, then free it, since it 
seems to be
needed only during probe, or we do something different. Perhaps another approach
would be to just define a bitmap in ioa_cfg like:

unsigned long used_cpu[(NR_CPUS / BITS_PER_LONG) + 1] and then use bitops to 
access and set the bits.
Then we'd likely just leave the memory around, since it wouldn't be very large.

> +
>       rc = 0;
>  out:
>       LEAVE;
>       return rc;
> 
> +out_free_trace:
> +     kfree(ioa_cfg->trace);
>  out_free_hostrcb_dma:
>       while (i-- > 0) {
>               dma_free_coherent(&pdev->dev, sizeof(struct ipr_hostrcb),
> @@ -9793,6 +9893,92 @@ static void ipr_wait_for_pci_err_recovery(struct 
> ipr_ioa_cfg *ioa_cfg)
>       }
>  }
> 
> +static int
> +ipr_find_next_free_cpu(struct ipr_ioa_cfg *ioa_cfg, u32 cpu_id)
> +{
> +     struct ipr_vector_map_info *map_per_cpu;
> +     int i;
> +
> +     map_per_cpu = ioa_cfg->cpu_map_tbl;
> +     for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++) {
> +             if (cpu_online(i)) {
> +                     if ((map_per_cpu->irq_id == IPR_VECTOR_MAP_EMPTY) &&
> +                             (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) &&
> +                             (map_per_cpu->cpu_id == cpu_id))
> +                                     return i;
> +             }
> +             map_per_cpu++;
> +     }
> +
> +     return IPR_VECTOR_MAP_EMPTY;
> +}
> +
> +static int ipr_set_cpu_affinity_for_hrrq(struct ipr_ioa_cfg *ioa_cfg,
> +                                     int vectors)
> +{
> +     struct ipr_vector_map_info *map_per_cpu;
> +     int cpu_idx = 0, hrrq_num, next_cpu_id, i;
> +
> +     memset(ioa_cfg->cpu_map_tbl, 0xff,
> +                     (sizeof(struct ipr_vector_map_info) *
> +                     ioa_cfg->possible_cpu_cnt));
> +
> +     map_per_cpu = ioa_cfg->cpu_map_tbl;
> +     for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++, map_per_cpu++)
> +             map_per_cpu->cpu_id = i;
> +
> +     for (i = 0; i < ipr_possible_cpu_cnt; i++) {
> +             if (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) {
> +                     cpu_idx = i;
> +                     break;
> +             }
> +     }
> +
> +     hrrq_num = 0;
> +     for (i = 0; i < vectors; i++) {
> +             map_per_cpu = ioa_cfg->cpu_map_tbl;
> +             next_cpu_id = ipr_find_next_free_cpu(ioa_cfg, cpu_idx);
> +
> +             if (next_cpu_id == IPR_VECTOR_MAP_EMPTY) {
> +                     int j;
> +
> +                     for (j = 0; j < i; j++)
> +                             irq_set_affinity_hint(
> +                                     ioa_cfg->vectors_info[j].vec, NULL);
> +                     ioa_cfg->cpu_map = 0;
> +                     dev_info(&ioa_cfg->pdev->dev, "Cannot set cpu 
> affinity\n");
> +                     return 0;
> +             }
> +
> +             map_per_cpu += next_cpu_id;
> +             ipr_used_cpu[next_cpu_id] = next_cpu_id;
> +
> +             map_per_cpu->irq_id = ioa_cfg->vectors_info[i].vec;
> +             map_per_cpu->hrrq_id = hrrq_num;
> +             irq_set_affinity_hint(ioa_cfg->vectors_info[i].vec,
> +                             get_cpu_mask(next_cpu_id));
> +             cpu_idx++;
> +             hrrq_num++;
> +     }
> +
> +     hrrq_num = 0;
> +     if (vectors < ioa_cfg->possible_cpu_cnt) {

Seems like there is room for improvement here when the number of CPUs is much 
larger than
the number of HRRQs. Perhaps we do this in a follow on patch. Seems like for a 
larger CPU
config, we'd want to start taking into account numa affinity and CPU topology 
and try to set
the irq affinity hint on multiple threads on the same core, in order to spread 
things as evenly
as we can.

> +             map_per_cpu = ioa_cfg->cpu_map_tbl;
> +             for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++) {
> +                     if (map_per_cpu->hrrq_id == IPR_VECTOR_MAP_EMPTY) {
> +                             map_per_cpu->hrrq_id = hrrq_num;
> +                             hrrq_num++;
> +                     }
> +
> +                     if (hrrq_num == ioa_cfg->hrrq_num)
> +                             hrrq_num = 0;
> +                     map_per_cpu++;
> +             }
> +     }
> +
> +     return 1;
> +}
> +
>  static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg)
>  {
>       struct msix_entry entries[IPR_MAX_MSIX_VECTORS];
> @@ -10047,6 +10233,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>       ioa_cfg->hdw_dma_regs = ipr_regs;
>       ioa_cfg->hdw_dma_regs_pci = ipr_regs_pci;
>       ioa_cfg->ioa_mailbox = ioa_cfg->chip_cfg->mailbox + ipr_regs;
> +     ioa_cfg->cpu_map = ipr_cpu_map;
> 
>       ipr_init_regs(ioa_cfg);
> 
> @@ -10108,6 +10295,8 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>               }
>       }
> 
> +     ioa_cfg->possible_cpu_cnt = ipr_possible_cpu_cnt;
> +
>       if (ioa_cfg->intr_flag == IPR_USE_MSI ||
>           ioa_cfg->intr_flag == IPR_USE_MSIX) {
>               rc = ipr_test_msi(ioa_cfg, pdev);
> @@ -10202,6 +10391,9 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>               goto cleanup_nolog;
>       }
> 
> +     if (ioa_cfg->cpu_map)
> +             ipr_set_cpu_affinity_for_hrrq(ioa_cfg, ioa_cfg->nvectors);
> +
>       if ((dev_id->driver_data & IPR_USE_PCI_WARM_RESET) ||
>           (dev_id->device == PCI_DEVICE_ID_IBM_OBSIDIAN_E && 
> !ioa_cfg->revid)) {
>               ioa_cfg->needs_warm_reset = 1;
> @@ -10645,9 +10837,13 @@ static struct notifier_block ipr_notifier = {
>   **/
>  static int __init ipr_init(void)
>  {
> +     int cpu;
>       ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n",
>                IPR_DRIVER_VERSION, IPR_DRIVER_DATE);
> 
> +     for_each_present_cpu(cpu)
> +             ipr_possible_cpu_cnt++;

This could be simplified to

ipr_possible_cpu_cnt = num_present_cpus();

Also, I don't think this works quite right with CPU hotplug. according to 
include/linux/cpumask.h,
cpu_present_mask varies dynamically depending on what is reported as currently 
plugged in. I think
if we use the following it would might better handle CPU hotplug:

ipr_possible_cpu_cnt = num_possible_cpus();

I'd suggest renaming ipr_possible_cpu_cnt to something like 
ipr_irq_vector_map_entries
to make it clearer what it is intended to do rather than where the value came 
from.

Reading the patch further, I'm thinking we should just ditch this global 
altogether. Its not needed
that I can see. You can simply initialize ioa_cfg->possible_cpu_cnt to 
num_possible_cpus() early
at probe time and then use that value everywhere. I'd still recommend renaming 
ioa_cfg->possible_cpu_cnt,
though, to something like ioa_cfg->num_irq_vector_map_entries.


> +
>       register_reboot_notifier(&ipr_notifier);
>       return pci_register_driver(&ipr_driver);
>  }
> diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
> index 56c5706..8f8ab6b 100644
> --- a/drivers/scsi/ipr.h
> +++ b/drivers/scsi/ipr.h
> @@ -1464,6 +1464,16 @@ enum ipr_sdt_state {
>       DUMP_OBTAINED
>  };
> 
> +#define IPR_VECTOR_MAP_EMPTY         0xffff
> +#define IPR_SET_IO_CPU_AFFINITY              0x1
> +
> +/* IRQ vector to CPU mapping when set CPU affinity*/
> +struct ipr_vector_map_info {
> +     u16 cpu_id;

cpu_id should probably be an unsigned int, to be consistent with the rest
of the kernel.

> +     u16 irq_id;
> +     u16 hrrq_id;
> +};
> +
>  /* Per-controller data */
>  struct ipr_ioa_cfg {
>       char eye_catcher[8];
> @@ -1595,6 +1605,12 @@ struct ipr_ioa_cfg {
> 
>       u32 iopoll_weight;
> 
> +     u32 cpu_map;
> +     struct ipr_vector_map_info *cpu_map_tbl;
> +     u16 possible_cpu_cnt;

And if we change cpu_id above, then this should become an unsigned int as well.

> +
> +     u32 use_blk_mq;
> +
>  }; /* struct ipr_ioa_cfg */
> 
>  struct ipr_cmnd {
> 


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785111&iu=/4140
_______________________________________________
Iprdd-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/iprdd-devel

Re: [Iprdd-devel] [V3 PATCH 2/4] Enable vectors to cpu affinity mapping

Reply via email to