Hi Brian, I config 3 processors in HMC for this lpar. Yes. the code is done per SMT thread, totally = 3 x 8 = 24 threads. I changed
cpu = smp_processor_id() to cpu = raw_smp_processor_id() in ipr_get_hrrq_index(). Didn't change other code. [root@everest-lp2 ~]# lscpu Architecture: ppc64le Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 8 Core(s) per socket: 1 Socket(s): 3 NUMA node(s): 4 Model: IBM,8286-42A L1d cache: 64K L1i cache: 32K L2 cache: 512K L3 cache: 8192K NUMA node0 CPU(s): NUMA node1 CPU(s): 0-15 NUMA node2 CPU(s): NUMA node3 CPU(s): 16-23 [root@everest-lp2 ~]# The first adapter map IRQ to cpu and the second adapter didn't do the map. [root@everest-lp2 scsi_host]# cat host3/cpu_map ipr driver cpu map: IRQ vector to CPU mapping (1): 24 online CPUs CPU 00 io_hrrq 00 IRQ=224 CPU 01 io_hrrq 01 IRQ=225 CPU 02 io_hrrq 02 IRQ=226 CPU 03 io_hrrq 03 IRQ=227 CPU 04 io_hrrq 04 IRQ=228 CPU 05 io_hrrq 05 IRQ=229 CPU 06 io_hrrq 06 IRQ=230 CPU 07 io_hrrq 07 IRQ=231 CPU 08 io_hrrq 08 IRQ=232 CPU 09 io_hrrq 09 IRQ=233 CPU 10 io_hrrq 10 IRQ=234 CPU 11 io_hrrq 11 IRQ=235 CPU 12 io_hrrq 12 IRQ=236 CPU 13 io_hrrq 13 IRQ=237 CPU 14 io_hrrq 14 IRQ=238 CPU 15 io_hrrq 15 IRQ=239 CPU 16 io_hrrq 00 CPU 17 io_hrrq 01 CPU 18 io_hrrq 02 CPU 19 io_hrrq 03 CPU 20 io_hrrq 04 CPU 21 io_hrrq 05 CPU 22 io_hrrq 06 CPU 23 io_hrrq 07 [root@everest-lp2 scsi_host]# cat host4/cpu_map ipr driver cpu map: No mapping (0) [root@everest-lp2 scsi_host]# The attach is /proc/interrupts after some fio runs. You can see how interrupts spread cross the cpus. (See attached file: interrupts) We can discuss more tomorrow. Thanks, Wendy From: Brian King <brk...@linux.vnet.ibm.com> To: wenxi...@linux.vnet.ibm.com, iprdd-devel@lists.sourceforge.net Date: 11/17/2015 05:15 PM Subject: Re: [Iprdd-devel] [PATCH 1/3] Enable set cpu affinity in ipr driver On 11/17/2015 11:20 AM, wenxi...@linux.vnet.ibm.com wrote: > From: Wen Xiong <wenxi...@linux.vnet.ibm.com> > > --- > drivers/scsi/ipr.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++ +++++- > drivers/scsi/ipr.h | 17 ++++ > 2 files changed, 239 insertions(+), 1 deletions(-) > > diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c > index 536cd5a..ba92b16 100644 > --- a/drivers/scsi/ipr.c > +++ b/drivers/scsi/ipr.c > @@ -100,6 +100,11 @@ static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS; > static unsigned int ipr_dual_ioa_raid = 1; > static unsigned int ipr_number_of_msix = 2; > static unsigned int ipr_fast_reboot; > +static unsigned int ipr_cpu_map = 0; No need to init to 0 since its a static variable. > + > +static unsigned int *ipr_used_cpu; > +static unsigned int ipr_possible_cpu_cnt; > + > static DEFINE_SPINLOCK(ipr_driver_lock); > > /* This table describes the differences between DMA controller chips */ > @@ -224,6 +229,8 @@ module_param_named(number_of_msix, ipr_number_of_msix, int, 0); > MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 16). (default:2)"); > module_param_named(fast_reboot, ipr_fast_reboot, int, S_IRUGO | S_IWUSR); > MODULE_PARM_DESC(fast_reboot, "Skip adapter shutdown during reboot. Set to 1 to enable. (default: 0)"); > +module_param_named(cpu_map, ipr_cpu_map, int, 0); > +MODULE_PARM_DESC(cpu_map, "Defines how to map CPUs to IRQ vectors per adapter"); Were are we going to document the syntax for this parameter? > MODULE_LICENSE("GPL"); > MODULE_VERSION(IPR_DRIVER_VERSION); > > @@ -1053,6 +1060,18 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, > static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) > { > unsigned int hrrq; > + struct ipr_vector_map_info *map_per_cpu; > + u32 cpu; > + > + if (ioa_cfg->set_cpu_affinity == IPR_SET_IO_CPU_AFFINITY > + && ioa_cfg->cpu_map && ioa_cfg->hrrq_num > 1) { > + cpu = smp_processor_id(); > + map_per_cpu = ioa_cfg->cpu_map_tbl; > + if (cpu < ioa_cfg->possible_cpu_cnt) { > + map_per_cpu += cpu; > + return map_per_cpu->hrrq_id; > + } > + } I'm thinking we should be taking into account the NUMA topology here. Seems like we'd want the affinity for an IRQ to be bound to a core and not a CPU. That way it could fire on any available SMT thread. It seems like what the code here does now is done per SMT thread, so on a P8, which has 8 threads per core, don't we end up running everything on the first 2 cores? Or am I not looking close enough at the code? Thanks, Brian > > if (ioa_cfg->hrrq_num == 1) > hrrq = 0; > @@ -4086,6 +4105,77 @@ static struct device_attribute ipr_ioa_fw_type_attr = { > .show = ipr_show_fw_type > }; > > +static ssize_t > +ipr_show_cpu_map_tbl(struct device *dev, struct device_attribute *attr, > + char *buf) > +{ > + struct Scsi_Host *shost = class_to_shost(dev); > + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost-> hostdata; > + struct ipr_vector_map_info *map_per_cpu; > + int len = 0, show_current_cpu = 0; > + > + if (ioa_cfg->intr_flag != IPR_USE_MSIX) > + return len; > + > + switch (ioa_cfg->cpu_map) { > + case 0: > + len += snprintf(buf + len, PAGE_SIZE-len, > + "ipr driver cpu map: No mapping (%d)\n", > + ioa_cfg->cpu_map); > + return len; > + case 1: > + len += snprintf(buf + len, PAGE_SIZE-len, > + "ipr driver cpu map: IRQ > vector to CPU mapping (%d): " > + "%d online CPUs\n", > + ioa_cfg->cpu_map, > + num_online_cpus()); > + break; > + } > + > + while (show_current_cpu < ioa_cfg->possible_cpu_cnt) { > + map_per_cpu = &ioa_cfg->cpu_map_tbl [show_current_cpu]; > + > + if (map_per_cpu->irq_id == > IPR_VECTOR_MAP_EMPTY) > + len += snprintf(buf + len, PAGE_SIZE-len, > + > "CPU %02d io_hrrq %02d\n ", > + > map_per_cpu-> cpu_id, > + > map_per_cpu-> hrrq_id); > + else > + len += snprintf(buf + len, PAGE_SIZE-len, > + > "CPU %02d io_hrrq %02d " > + > "IRQ=%d\n", map_per_cpu->cpu_id, > + > map_per_cpu-> hrrq_id, > + > map_per_cpu-> irq_id); > + > + show_current_cpu++; > + > + if (show_current_cpu < > ioa_cfg->possible_cpu_cnt > + && (len >= > (PAGE_SIZE - 64))) { > + len += snprintf(buf + len, PAGE_SIZE-len, "more...\n"); > + break; > + } > + } > + > + return len; > +} > + > +static ssize_t > +ipr_store_cpu_map_tbl(struct device *dev, struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + int status = -EINVAL; > + return status; > +} > + > +static struct device_attribute ipr_cpu_map_attr = { > + .attr = { > + .name = > "cpu_map", > + .mode = > S_IRUGO | S_IWUSR, > + }, > + .show = ipr_show_cpu_map_tbl, > + .store = ipr_store_cpu_map_tbl > +}; > + > static struct device_attribute *ipr_ioa_attrs[] = { > &ipr_fw_version_attr, > &ipr_log_level_attr, > @@ -4095,6 +4185,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { > &ipr_update_fw_attr, > &ipr_ioa_fw_type_attr, > &ipr_iopoll_weight_attr, > + &ipr_cpu_map_attr, > NULL, > }; > > @@ -9333,6 +9424,8 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg) > > ipr_free_dump(ioa_cfg); > kfree(ioa_cfg->trace); > + if (ioa_cfg->cpu_map) > + kfree(ioa_cfg->cpu_map_tbl); > } > > /** > @@ -9352,9 +9445,13 @@ static void ipr_free_irqs(struct ipr_ioa_cfg *ioa_cfg) > if (ioa_cfg->intr_flag == IPR_USE_MSI || > ioa_cfg->intr_flag == IPR_USE_MSIX) { > int i; > - for (i = 0; i < ioa_cfg->nvectors; i++) > + for (i = 0; i < ioa_cfg->nvectors; i++) { > + if (ioa_cfg->cpu_map) > + > irq_set_affinity_hint( > + > ioa_cfg-> vectors_info[i].vec, NULL); > > free_irq(ioa_cfg->vectors_info[i].vec, > > &ioa_cfg->hrrq[i]); > + } > } else > free_irq(pdev->irq, &ioa_cfg->hrrq[0]); > > @@ -9585,11 +9682,35 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) > if (!ioa_cfg->trace) > goto out_free_hostrcb_dma; > > + if (ioa_cfg->cpu_map) { > + ioa_cfg->cpu_map_tbl = > + kzalloc(sizeof(struct ipr_vector_map_info) * > + ioa_cfg->possible_cpu_cnt, GFP_KERNEL); > + > + if (!ioa_cfg->cpu_map_tbl) > + goto out_free_trace; > + } > + > + if (ipr_used_cpu == NULL) { > + ipr_used_cpu = kzalloc((sizeof(uint16_t) * > + > ipr_possible_cpu_cnt), GFP_KERNEL); > + if (!ipr_used_cpu) > + goto out_free_cpu_map_tbl; > + > + for (i = 0; i < ipr_possible_cpu_cnt; i++) > + ipr_used_cpu[i] = IPR_VECTOR_MAP_EMPTY; > + } > + > rc = 0; > out: > LEAVE; > return rc; > > +out_free_cpu_map_tbl: > + if (ioa_cfg->cpu_map) > + kfree(ioa_cfg->cpu_map_tbl); > +out_free_trace: > + kfree(ioa_cfg->trace); > out_free_hostrcb_dma: > while (i-- > 0) { > dma_free_coherent(&pdev->dev, sizeof(struct ipr_hostrcb), > @@ -9791,6 +9912,94 @@ static void ipr_wait_for_pci_err_recovery(struct ipr_ioa_cfg *ioa_cfg) > } > } > > +static int > +ipr_find_next_free_cpu(struct ipr_ioa_cfg *ioa_cfg, uint32_t cpu_id) > +{ > + struct ipr_vector_map_info *map_per_cpu; > + int i; > + > + map_per_cpu = ioa_cfg->cpu_map_tbl; > + for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++) { > + if (cpu_online(i)) { > + if ((map_per_cpu->irq_id == IPR_VECTOR_MAP_EMPTY) && > + > (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) && > + > (map_per_cpu->cpu_id == cpu_id)) > + > return i; > + } > + map_per_cpu++; > + } > + > + return IPR_VECTOR_MAP_EMPTY; > +} > + > +static int ipr_set_cpu_affinity_for_hrrq(struct ipr_ioa_cfg *ioa_cfg, > + > int vectors) > +{ > + struct ipr_vector_map_info *map_per_cpu; > + u32 cpu_idx = 0, hrrq_num = 0, next_cpu_id, i; > + > + if (!ioa_cfg->cpu_map) > + return 1; > + > + memset(ioa_cfg->cpu_map_tbl, 0xff, > + (sizeof(struct > ipr_vector_map_info) * > + ioa_cfg->possible_cpu_cnt)); > + > + map_per_cpu = ioa_cfg->cpu_map_tbl; > + for (i = 0; i < ioa_cfg->possible_cpu_cnt; i++, map_per_cpu++) > + map_per_cpu->cpu_id = i; > + > + for (i = 0; i < ipr_possible_cpu_cnt; i++) { > + if (ipr_used_cpu[i] == IPR_VECTOR_MAP_EMPTY) { > + cpu_idx = i; > + break; > + } > + } > + > + hrrq_num = 0; > + for (i = 0; i < vectors; i++) { > + map_per_cpu = ioa_cfg->cpu_map_tbl; > + next_cpu_id = ipr_find_next_free_cpu(ioa_cfg, cpu_idx); > + if (next_cpu_id == IPR_VECTOR_MAP_EMPTY) { > + int j; > + > + for (j = 0; j < i; j++) > + > irq_set_affinity_hint( > + > ioa_cfg-> vectors_info[j].vec, NULL); > + ioa_cfg->cpu_map = 0; > + dev_info(&ioa_cfg->pdev->dev, > "Cannot set cpu affinity\n"); > + return 0; > + } > + map_per_cpu += next_cpu_id; > + ipr_used_cpu[next_cpu_id] = next_cpu_id; > + > + map_per_cpu->irq_id = ioa_cfg->vectors_info [i].vec; > + map_per_cpu->hrrq_id = hrrq_num; > + irq_set_affinity_hint(ioa_cfg->vectors_info [i].vec, > + get_cpu_mask (next_cpu_id)); > + cpu_idx++; > + hrrq_num++; > + } > + > + hrrq_num = 0; > + if (vectors < ioa_cfg->possible_cpu_cnt) { > + map_per_cpu = ioa_cfg->cpu_map_tbl; > + for (i = 0; i < ioa_cfg->possible_cpu_cnt; > i++) { > + if (map_per_cpu->hrrq_id == IPR_VECTOR_MAP_EMPTY) { > + > map_per_cpu->hrrq_id = hrrq_num; > + hrrq_num++; > + } > + if (hrrq_num == > ioa_cfg->hrrq_num) > + hrrq_num = 0; > + map_per_cpu++; > + } > + } > + > + ioa_cfg->set_cpu_affinity = IPR_SET_IO_CPU_AFFINITY; > + > + return 1; > +} > + > static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg) > { > struct msix_entry entries[IPR_MAX_MSIX_VECTORS]; > @@ -10045,6 +10254,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, > ioa_cfg->hdw_dma_regs = ipr_regs; > ioa_cfg->hdw_dma_regs_pci = ipr_regs_pci; > ioa_cfg->ioa_mailbox = ioa_cfg->chip_cfg->mailbox + ipr_regs; > + ioa_cfg->cpu_map = ipr_cpu_map; > > ipr_init_regs(ioa_cfg); > > @@ -10106,6 +10316,8 @@ static int ipr_probe_ioa(struct pci_dev *pdev, > } > } > > + ioa_cfg->possible_cpu_cnt = ipr_possible_cpu_cnt; > + > if (ioa_cfg->intr_flag == IPR_USE_MSI || > ioa_cfg->intr_flag == IPR_USE_MSIX) { > rc = ipr_test_msi(ioa_cfg, pdev); > @@ -10200,6 +10412,9 @@ static int ipr_probe_ioa(struct pci_dev *pdev, > goto cleanup_nolog; > } > > + if (ioa_cfg->cpu_map) > + ipr_set_cpu_affinity_for_hrrq(ioa_cfg, > ioa_cfg-> nvectors); > + > if ((dev_id->driver_data & IPR_USE_PCI_WARM_RESET) || > (dev_id->device == PCI_DEVICE_ID_IBM_OBSIDIAN_E && !ioa_cfg->revid)) { > ioa_cfg->needs_warm_reset = 1; > @@ -10644,9 +10859,14 @@ static struct notifier_block ipr_notifier = { > **/ > static int __init ipr_init(void) > { > + int cpu; > ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n", > IPR_DRIVER_VERSION, IPR_DRIVER_DATE); > > + ipr_used_cpu = NULL; > + for_each_present_cpu(cpu) > + ipr_possible_cpu_cnt++; > + > register_reboot_notifier(&ipr_notifier); > return pci_register_driver(&ipr_driver); > } > @@ -10663,6 +10883,7 @@ static void __exit ipr_exit(void) > { > unregister_reboot_notifier(&ipr_notifier); > pci_unregister_driver(&ipr_driver); > + kfree(ipr_used_cpu); > } > > module_init(ipr_init); > diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h > index a34c7a5..c28a673 100644 > --- a/drivers/scsi/ipr.h > +++ b/drivers/scsi/ipr.h > @@ -1464,6 +1464,16 @@ enum ipr_sdt_state { > DUMP_OBTAINED > }; > > +#define IPR_VECTOR_MAP_EMPTY 0xffff > +#define IPR_SET_IO_CPU_AFFINITY 0x1 > + > +/* IRQ vector to CPU mapping when set CPU affinity*/ > +struct ipr_vector_map_info { > + u16 cpu_id; > + u16 irq_id; > + u16 hrrq_id; > +}; > + > /* Per-controller data */ > struct ipr_ioa_cfg { > char eye_catcher[8]; > @@ -1595,6 +1605,13 @@ struct ipr_ioa_cfg { > > u32 iopoll_weight; > > + u32 cpu_map; > + struct ipr_vector_map_info *cpu_map_tbl; > + u16 possible_cpu_cnt; > + u16 set_cpu_affinity; > + > + u32 use_blk_mq; > + > }; /* struct ipr_ioa_cfg */ > > struct ipr_cmnd { > -- Brian King Power Linux I/O IBM Linux Technology Center ------------------------------------------------------------------------------ Give your users amazing mobile app experiences with Intel XDK. Use one codebase in this all-in-one HTML5 development environment. Design, debug & build mobile apps & 2-D/3-D games for multiple OSs. Then get your creation into app stores sooner, with many ways to monetize. http://pubads.g.doubleclick.net/gampad/clk?id=254741551&iu=/4140 _______________________________________________ Iprdd-devel mailing list Iprdd-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/iprdd-devel
interrupts
Description: Binary data
------------------------------------------------------------------------------
_______________________________________________ Iprdd-devel mailing list Iprdd-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/iprdd-devel