Hi Brian,

I config 3 processors in HMC for this lpar. Yes. the code is done per SMT
thread, totally = 3 x 8 = 24 threads. I changed

cpu = smp_processor_id() to cpu = raw_smp_processor_id() in
ipr_get_hrrq_index().

Didn't change other code.

[root@everest-lp2 ~]# lscpu
Architecture:          ppc64le
Byte Order:            Little Endian
CPU(s):                24
On-line CPU(s) list:   0-23
Thread(s) per core:    8
Core(s) per socket:    1
Socket(s):             3
NUMA node(s):          4
Model:                 IBM,8286-42A
L1d cache:             64K
L1i cache:             32K
L2 cache:              512K
L3 cache:              8192K
NUMA node0 CPU(s):
NUMA node1 CPU(s):     0-15
NUMA node2 CPU(s):
NUMA node3 CPU(s):     16-23
[root@everest-lp2 ~]#

The first adapter map IRQ to cpu and the second adapter didn't do the map.
[root@everest-lp2 scsi_host]# cat host3/cpu_map
ipr driver cpu map: IRQ vector to CPU mapping (1): 24 online CPUs
CPU 00 io_hrrq 00 IRQ=224
CPU 01 io_hrrq 01 IRQ=225
CPU 02 io_hrrq 02 IRQ=226
CPU 03 io_hrrq 03 IRQ=227
CPU 04 io_hrrq 04 IRQ=228
CPU 05 io_hrrq 05 IRQ=229
CPU 06 io_hrrq 06 IRQ=230
CPU 07 io_hrrq 07 IRQ=231
CPU 08 io_hrrq 08 IRQ=232
CPU 09 io_hrrq 09 IRQ=233
CPU 10 io_hrrq 10 IRQ=234
CPU 11 io_hrrq 11 IRQ=235
CPU 12 io_hrrq 12 IRQ=236
CPU 13 io_hrrq 13 IRQ=237
CPU 14 io_hrrq 14 IRQ=238
CPU 15 io_hrrq 15 IRQ=239
CPU 16 io_hrrq 00
CPU 17 io_hrrq 01
CPU 18 io_hrrq 02
CPU 19 io_hrrq 03
CPU 20 io_hrrq 04
CPU 21 io_hrrq 05
CPU 22 io_hrrq 06
CPU 23 io_hrrq 07
[root@everest-lp2 scsi_host]# cat host4/cpu_map
ipr driver cpu map: No mapping (0)
[root@everest-lp2 scsi_host]#

The attach is /proc/interrupts after some fio runs. You can see how
interrupts spread cross the cpus.

(See attached file: interrupts)

We can discuss more tomorrow.
Thanks,
Wendy



From:   Brian King <brk...@linux.vnet.ibm.com>
To:     wenxi...@linux.vnet.ibm.com, iprdd-devel@lists.sourceforge.net
Date:   11/17/2015 05:15 PM
Subject:        Re: [Iprdd-devel] [PATCH 1/3] Enable set cpu affinity in ipr
            driver



On 11/17/2015 11:20 AM, wenxi...@linux.vnet.ibm.com wrote:
> From: Wen Xiong <wenxi...@linux.vnet.ibm.com>
>
> ---
>  drivers/scsi/ipr.c |  223 ++++++++++++++++++++++++++++++++++++++++++++++
+++++-
>  drivers/scsi/ipr.h |   17 ++++
>  2 files changed, 239 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
> index 536cd5a..ba92b16 100644
> --- a/drivers/scsi/ipr.c
> +++ b/drivers/scsi/ipr.c
> @@ -100,6 +100,11 @@ static unsigned int ipr_max_devs =
IPR_DEFAULT_SIS64_DEVS;
>  static unsigned int ipr_dual_ioa_raid = 1;
>  static unsigned int ipr_number_of_msix = 2;
>  static unsigned int ipr_fast_reboot;
> +static unsigned int ipr_cpu_map = 0;

No need to init to 0 since its a static variable.

> +
> +static unsigned int *ipr_used_cpu;
> +static unsigned int ipr_possible_cpu_cnt;
> +
>  static DEFINE_SPINLOCK(ipr_driver_lock);
>
>  /* This table describes the differences between DMA controller chips */
> @@ -224,6 +229,8 @@ module_param_named(number_of_msix,
ipr_number_of_msix, int, 0);
>  MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts
to use on capable adapters (1 - 16).  (default:2)");
>  module_param_named(fast_reboot, ipr_fast_reboot, int, S_IRUGO |
S_IWUSR);
>  MODULE_PARM_DESC(fast_reboot, "Skip adapter shutdown during reboot. Set
to 1 to enable. (default: 0)");
> +module_param_named(cpu_map, ipr_cpu_map,  int, 0);
> +MODULE_PARM_DESC(cpu_map, "Defines how to map CPUs to IRQ vectors per
adapter");

Were are we going to document the syntax for this parameter?

>  MODULE_LICENSE("GPL");
>  MODULE_VERSION(IPR_DRIVER_VERSION);
>
> @@ -1053,6 +1060,18 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd
*ipr_cmd,
>  static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg)
>  {
>                unsigned int hrrq;
> +              struct ipr_vector_map_info *map_per_cpu;
> +              u32 cpu;
> +
> +              if (ioa_cfg->set_cpu_affinity == IPR_SET_IO_CPU_AFFINITY
> +                              && ioa_cfg->cpu_map && ioa_cfg->hrrq_num > 1) {
> +                              cpu = smp_processor_id();
> +                              map_per_cpu = ioa_cfg->cpu_map_tbl;
> +                              if (cpu < ioa_cfg->possible_cpu_cnt) {
> +                                              map_per_cpu += cpu;
> +                                              return map_per_cpu->hrrq_id;
> +                              }
> +              }

I'm thinking we should be taking into account the NUMA topology here.
Seems like we'd want the affinity for an IRQ to be bound to a core and not
a CPU.
That way it could fire on any available SMT thread.

It seems like what the code here does now is done per SMT thread, so on a
P8, which
has 8 threads per core, don't we end up running everything on the first 2
cores?
Or am I not looking close enough at the code?

Thanks,

Brian



>
>                if (ioa_cfg->hrrq_num == 1)
>                                hrrq = 0;
> @@ -4086,6 +4105,77 @@ static struct device_attribute
ipr_ioa_fw_type_attr = {
>                .show = ipr_show_fw_type
>  };
>
> +static ssize_t
> +ipr_show_cpu_map_tbl(struct device *dev, struct device_attribute *attr,
> +                                              char *buf)
> +{
> +              struct Scsi_Host *shost = class_to_shost(dev);
> +              struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->
hostdata;
> +              struct ipr_vector_map_info *map_per_cpu;
> +              int  len = 0, show_current_cpu = 0;
> +
> +              if (ioa_cfg->intr_flag != IPR_USE_MSIX)
> +                              return len;
> +
> +              switch (ioa_cfg->cpu_map) {
> +              case 0:
> +                              len += snprintf(buf + len, PAGE_SIZE-len,
> +                                              "ipr driver cpu map: No mapping
(%d)\n",
> +                                              ioa_cfg->cpu_map);
> +                              return len;
> +              case 1:
> +                              len += snprintf(buf + len, PAGE_SIZE-len,
> +                                              "ipr driver cpu map: IRQ 
> vector to CPU
mapping (%d): "
> +                                              "%d online CPUs\n",
> +                                              ioa_cfg->cpu_map,
> +                                              num_online_cpus());
> +                              break;
> +              }
> +
> +              while (show_current_cpu < ioa_cfg->possible_cpu_cnt) {
> +                              map_per_cpu = &ioa_cfg->cpu_map_tbl
[show_current_cpu];
> +
> +                              if (map_per_cpu->irq_id == 
> IPR_VECTOR_MAP_EMPTY)
> +                                              len += snprintf(buf + len,
PAGE_SIZE-len,
> +                                                                             
>  "CPU %02d
io_hrrq %02d\n ",
> +                                                                             
>  map_per_cpu->
cpu_id,
> +                                                                             
>  map_per_cpu->
hrrq_id);
> +                              else
> +                                              len += snprintf(buf + len,
PAGE_SIZE-len,
> +                                                                             
>  "CPU %02d
io_hrrq %02d "
> +                                                                             
>  "IRQ=%d\n",
map_per_cpu->cpu_id,
> +                                                                             
>  map_per_cpu->
hrrq_id,
> +                                                                             
>  map_per_cpu->
irq_id);
> +
> +                              show_current_cpu++;
> +
> +                              if (show_current_cpu < 
> ioa_cfg->possible_cpu_cnt
> +                                                              && (len >= 
> (PAGE_SIZE -
64))) {
> +                                              len += snprintf(buf + len,
PAGE_SIZE-len, "more...\n");
> +                                              break;
> +                              }
> +              }
> +
> +              return len;
> +}
> +
> +static ssize_t
> +ipr_store_cpu_map_tbl(struct device *dev, struct device_attribute *attr,
> +                                              const char *buf, size_t count)
> +{
> +              int status = -EINVAL;
> +              return status;
> +}
> +
> +static struct device_attribute ipr_cpu_map_attr = {
> +              .attr = {
> +                              .name =                                 
> "cpu_map",
> +                              .mode =                                 
> S_IRUGO | S_IWUSR,
> +              },
> +              .show = ipr_show_cpu_map_tbl,
> +              .store = ipr_store_cpu_map_tbl
> +};
> +
>  static struct device_attribute *ipr_ioa_attrs[] = {
>                &ipr_fw_version_attr,
>                &ipr_log_level_attr,
> @@ -4095,6 +4185,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
>                &ipr_update_fw_attr,
>                &ipr_ioa_fw_type_attr,
>                &ipr_iopoll_weight_attr,
> +              &ipr_cpu_map_attr,
>                NULL,
>  };
>
> @@ -9333,6 +9424,8 @@ static void ipr_free_mem(struct ipr_ioa_cfg
*ioa_cfg)
>
>                ipr_free_dump(ioa_cfg);
>                kfree(ioa_cfg->trace);
> +              if (ioa_cfg->cpu_map)
> +                              kfree(ioa_cfg->cpu_map_tbl);
>  }
>
>  /**
> @@ -9352,9 +9445,13 @@ static void ipr_free_irqs(struct ipr_ioa_cfg
*ioa_cfg)
>                if (ioa_cfg->intr_flag == IPR_USE_MSI ||
>                    ioa_cfg->intr_flag == IPR_USE_MSIX) {
>                                int i;
> -                              for (i = 0; i < ioa_cfg->nvectors; i++)
> +                              for (i = 0; i < ioa_cfg->nvectors; i++) {
> +                                              if (ioa_cfg->cpu_map)
> +                                                              
> irq_set_affinity_hint(
> +                                                                             
>  ioa_cfg->
vectors_info[i].vec, NULL);
>                                                
> free_irq(ioa_cfg->vectors_info[i].vec,
>                                                                 
> &ioa_cfg->hrrq[i]);
> +                              }
>                } else
>                                free_irq(pdev->irq, &ioa_cfg->hrrq[0]);
>
> @@ -9585,11 +9682,35 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg
*ioa_cfg)
>                if (!ioa_cfg->trace)
>                                goto out_free_hostrcb_dma;
>
> +              if (ioa_cfg->cpu_map) {
> +                              ioa_cfg->cpu_map_tbl =
> +                                              kzalloc(sizeof(struct
ipr_vector_map_info) *
> +                                              ioa_cfg->possible_cpu_cnt,
GFP_KERNEL);
> +
> +                              if (!ioa_cfg->cpu_map_tbl)
> +                                              goto out_free_trace;
> +              }
> +
> +              if (ipr_used_cpu == NULL) {
> +                              ipr_used_cpu = kzalloc((sizeof(uint16_t) *
> +                                                              
> ipr_possible_cpu_cnt),
GFP_KERNEL);
> +                              if (!ipr_used_cpu)
> +                                              goto out_free_cpu_map_tbl;
> +
> +                              for (i = 0; i < ipr_possible_cpu_cnt; i++)
> +                                              ipr_used_cpu[i] =
IPR_VECTOR_MAP_EMPTY;
> +              }
> +
>                rc = 0;
>  out:
>                LEAVE;
>                return rc;
>
> +out_free_cpu_map_tbl:
> +              if (ioa_cfg->cpu_map)
> +                              kfree(ioa_cfg->cpu_map_tbl);
> +out_free_trace:
> +              kfree(ioa_cfg->trace);
>  out_free_hostrcb_dma:
>                while (i-- > 0) {
>                                dma_free_coherent(&pdev->dev, sizeof(struct
ipr_hostrcb),
> @@ -9791,6 +9912,94 @@ static void ipr_wait_for_pci_err_recovery(struct
ipr_ioa_cfg *ioa_cfg)
>                }
>  }
>
> +static int
> +ipr_find_next_free_cpu(struct ipr_ioa_cfg *ioa_cfg, uint32_t cpu_id)
> +{
> +              struct ipr_vector_map_info *map_per_cpu;
> +              int i;
> +
> +              map_per_cpu = ioa_cfg->cpu_map_tbl;
> +              for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++) {
> +                              if (cpu_online(i)) {
> +                                              if ((map_per_cpu->irq_id ==
IPR_VECTOR_MAP_EMPTY) &&
> +                                                              
> (ipr_used_cpu[i] ==
IPR_VECTOR_MAP_EMPTY) &&
> +                                                              
> (map_per_cpu->cpu_id ==
cpu_id))
> +                                                                             
>  return i;
> +                              }
> +                              map_per_cpu++;
> +              }
> +
> +              return IPR_VECTOR_MAP_EMPTY;
> +}
> +
> +static int ipr_set_cpu_affinity_for_hrrq(struct ipr_ioa_cfg *ioa_cfg,
> +                                                                             
>  int vectors)
> +{
> +              struct ipr_vector_map_info *map_per_cpu;
> +              u32 cpu_idx = 0, hrrq_num = 0, next_cpu_id, i;
> +
> +              if (!ioa_cfg->cpu_map)
> +                              return 1;
> +
> +              memset(ioa_cfg->cpu_map_tbl, 0xff,
> +                                              (sizeof(struct 
> ipr_vector_map_info) *
> +                                              ioa_cfg->possible_cpu_cnt));
> +
> +              map_per_cpu = ioa_cfg->cpu_map_tbl;
> +              for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++, map_per_cpu++)
> +                              map_per_cpu->cpu_id = i;
> +
> +              for (i = 0; i < ipr_possible_cpu_cnt; i++) {
> +                              if (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) {
> +                                              cpu_idx = i;
> +                                              break;
> +                              }
> +              }
> +
> +              hrrq_num = 0;
> +              for (i = 0; i < vectors; i++) {
> +                              map_per_cpu = ioa_cfg->cpu_map_tbl;
> +                              next_cpu_id = ipr_find_next_free_cpu(ioa_cfg,
cpu_idx);
> +                              if (next_cpu_id == IPR_VECTOR_MAP_EMPTY) {
> +                                              int j;
> +
> +                                              for (j = 0; j < i; j++)
> +                                                              
> irq_set_affinity_hint(
> +                                                                             
>  ioa_cfg->
vectors_info[j].vec, NULL);
> +                                              ioa_cfg->cpu_map = 0;
> +                                              dev_info(&ioa_cfg->pdev->dev, 
> "Cannot
set cpu affinity\n");
> +                                              return 0;
> +                              }
> +                              map_per_cpu += next_cpu_id;
> +                              ipr_used_cpu[next_cpu_id] = next_cpu_id;
> +
> +                              map_per_cpu->irq_id = ioa_cfg->vectors_info
[i].vec;
> +                              map_per_cpu->hrrq_id = hrrq_num;
> +                              irq_set_affinity_hint(ioa_cfg->vectors_info
[i].vec,
> +                                                              get_cpu_mask
(next_cpu_id));
> +                              cpu_idx++;
> +                              hrrq_num++;
> +              }
> +
> +              hrrq_num = 0;
> +              if (vectors < ioa_cfg->possible_cpu_cnt) {
> +                              map_per_cpu = ioa_cfg->cpu_map_tbl;
> +                              for (i = 0; i < ioa_cfg->possible_cpu_cnt; 
> i++) {
> +                                              if (map_per_cpu->hrrq_id ==
IPR_VECTOR_MAP_EMPTY) {
> +                                                              
> map_per_cpu->hrrq_id =
hrrq_num;
> +                                                              hrrq_num++;
> +                                              }
> +                                              if (hrrq_num == 
> ioa_cfg->hrrq_num)
> +                                                              hrrq_num = 0;
> +                                              map_per_cpu++;
> +                              }
> +              }
> +
> +              ioa_cfg->set_cpu_affinity = IPR_SET_IO_CPU_AFFINITY;
> +
> +              return 1;
> +}
> +
>  static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg)
>  {
>                struct msix_entry entries[IPR_MAX_MSIX_VECTORS];
> @@ -10045,6 +10254,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>                ioa_cfg->hdw_dma_regs = ipr_regs;
>                ioa_cfg->hdw_dma_regs_pci = ipr_regs_pci;
>                ioa_cfg->ioa_mailbox = ioa_cfg->chip_cfg->mailbox + ipr_regs;
> +              ioa_cfg->cpu_map = ipr_cpu_map;
>
>                ipr_init_regs(ioa_cfg);
>
> @@ -10106,6 +10316,8 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>                                }
>                }
>
> +              ioa_cfg->possible_cpu_cnt = ipr_possible_cpu_cnt;
> +
>                if (ioa_cfg->intr_flag == IPR_USE_MSI ||
>                    ioa_cfg->intr_flag == IPR_USE_MSIX) {
>                                rc = ipr_test_msi(ioa_cfg, pdev);
> @@ -10200,6 +10412,9 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
>                                goto cleanup_nolog;
>                }
>
> +              if (ioa_cfg->cpu_map)
> +                              ipr_set_cpu_affinity_for_hrrq(ioa_cfg, 
> ioa_cfg->
nvectors);
> +
>                if ((dev_id->driver_data & IPR_USE_PCI_WARM_RESET) ||
>                    (dev_id->device == PCI_DEVICE_ID_IBM_OBSIDIAN_E
&& !ioa_cfg->revid)) {
>                                ioa_cfg->needs_warm_reset = 1;
> @@ -10644,9 +10859,14 @@ static struct notifier_block ipr_notifier = {
>   **/
>  static int __init ipr_init(void)
>  {
> +              int cpu;
>                ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n",
>                                 IPR_DRIVER_VERSION, IPR_DRIVER_DATE);
>
> +              ipr_used_cpu = NULL;
> +              for_each_present_cpu(cpu)
> +                              ipr_possible_cpu_cnt++;
> +
>                register_reboot_notifier(&ipr_notifier);
>                return pci_register_driver(&ipr_driver);
>  }
> @@ -10663,6 +10883,7 @@ static void __exit ipr_exit(void)
>  {
>                unregister_reboot_notifier(&ipr_notifier);
>                pci_unregister_driver(&ipr_driver);
> +              kfree(ipr_used_cpu);
>  }
>
>  module_init(ipr_init);
> diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
> index a34c7a5..c28a673 100644
> --- a/drivers/scsi/ipr.h
> +++ b/drivers/scsi/ipr.h
> @@ -1464,6 +1464,16 @@ enum ipr_sdt_state {
>                DUMP_OBTAINED
>  };
>
> +#define IPR_VECTOR_MAP_EMPTY                          0xffff
> +#define IPR_SET_IO_CPU_AFFINITY                               0x1
> +
> +/* IRQ vector to CPU mapping when set CPU affinity*/
> +struct ipr_vector_map_info {
> +              u16 cpu_id;
> +              u16 irq_id;
> +              u16 hrrq_id;
> +};
> +
>  /* Per-controller data */
>  struct ipr_ioa_cfg {
>                char eye_catcher[8];
> @@ -1595,6 +1605,13 @@ struct ipr_ioa_cfg {
>
>                u32 iopoll_weight;
>
> +              u32 cpu_map;
> +              struct ipr_vector_map_info *cpu_map_tbl;
> +              u16 possible_cpu_cnt;
> +              u16 set_cpu_affinity;
> +
> +              u32 use_blk_mq;
> +
>  }; /* struct ipr_ioa_cfg */
>
>  struct ipr_cmnd {
>


--
Brian King
Power Linux I/O
IBM Linux Technology Center


------------------------------------------------------------------------------

Give your users amazing mobile app experiences with Intel XDK.
Use one codebase in this all-in-one HTML5 development environment.
Design, debug & build mobile apps & 2-D/3-D games for multiple OSs.
Then get your creation into app stores sooner, with many ways to monetize.
http://pubads.g.doubleclick.net/gampad/clk?id=254741551&iu=/4140
_______________________________________________
Iprdd-devel mailing list
Iprdd-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/iprdd-devel


Attachment: interrupts
Description: Binary data

------------------------------------------------------------------------------
_______________________________________________
Iprdd-devel mailing list
Iprdd-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/iprdd-devel

Reply via email to