[PATCH] net/ethernet/freescale: Fix incorrect IS_ERR_VALUE macro usages
IS_ERR_VALUE macro should be used only with unsigned long type. Especially it works incorrectly with unsigned shorter types on 64bit machines. Fixes: 4c35630ccda5 ("[POWERPC] Change rheap functions to use ulongs instead of pointers") Signed-off-by: Wei Li --- drivers/net/ethernet/freescale/ucc_geth.c | 30 +++ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 714b501be7d0..8656d9be256a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -286,7 +286,7 @@ static int fill_init_enet_entries(struct ucc_geth_private *ugeth, else { init_enet_offset = qe_muram_alloc(thread_size, thread_alignment); - if (IS_ERR_VALUE(init_enet_offset)) { + if (IS_ERR_VALUE((unsigned long)(int)init_enet_offset)) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory\n"); qe_put_snum((u8) snum); @@ -2223,7 +2223,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) ugeth->tx_bd_ring_offset[j] = qe_muram_alloc(length, UCC_GETH_TX_BD_RING_ALIGNMENT); - if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j])) + if (!IS_ERR_VALUE((unsigned long)(int)ugeth->tx_bd_ring_offset[j])) ugeth->p_tx_bd_ring[j] = (u8 __iomem *) qe_muram_addr(ugeth-> tx_bd_ring_offset[j]); @@ -2300,7 +2300,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) ugeth->rx_bd_ring_offset[j] = qe_muram_alloc(length, UCC_GETH_RX_BD_RING_ALIGNMENT); - if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j])) + if (!IS_ERR_VALUE((unsigned long)(int)ugeth->rx_bd_ring_offset[j])) ugeth->p_rx_bd_ring[j] = (u8 __iomem *) qe_muram_addr(ugeth-> rx_bd_ring_offset[j]); @@ -2510,7 +2510,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->tx_glbl_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram), UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) { + if (IS_ERR_VALUE((unsigned long)(int)ugeth->tx_glbl_pram_offset)) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n"); return -ENOMEM; @@ -2530,7 +2530,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) sizeof(struct ucc_geth_thread_data_tx) + 32 * (numThreadsTxNumerical == 1), UCC_GETH_THREAD_DATA_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->thread_dat_tx_offset)) { + if (IS_ERR_VALUE((unsigned long)(int)ugeth->thread_dat_tx_offset)) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_thread_data_tx\n"); return -ENOMEM; @@ -2557,7 +2557,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) qe_muram_alloc(ug_info->numQueuesTx * sizeof(struct ucc_geth_send_queue_qd), UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) { + if (IS_ERR_VALUE((unsigned long)(int)ugeth->send_q_mem_reg_offset)) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_send_q_mem_reg\n"); return -ENOMEM; @@ -2597,7 +2597,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->scheduler_offset = qe_muram_alloc(sizeof(struct ucc_geth_scheduler), UCC_GETH_SCHEDULER_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->scheduler_offset)) { + if (IS_ERR_VALUE((unsigned long)(int)ugeth->scheduler_offset)) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_scheduler\n"); return -ENOMEM; @@ -2644,7 +2644,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) qe_muram_alloc(sizeof (struct ucc_geth_tx_firmware_statistics_pram),
[PATCH] net: fs_enet: Fix incorrect IS_ERR_VALUE macro usages
IS_ERR_VALUE macro should be used only with unsigned long type. Especially it works incorrectly with unsigned shorter types on 64bit machines. Fixes: 976de6a8c304 ("fs_enet: Be an of_platform device when CONFIG_PPC_CPM_NEW_BINDING is set.") Fixes: 4c35630ccda5 ("[POWERPC] Change rheap functions to use ulongs instead of pointers") Signed-off-by: Wei Li --- drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index b47490be872c..e2117ad46130 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -107,7 +107,7 @@ static int do_pd_setup(struct fs_enet_private *fep) fep->fcc.mem = (void __iomem *)cpm2_immr; fpi->dpram_offset = cpm_dpalloc(128, 32); - if (IS_ERR_VALUE(fpi->dpram_offset)) { + if (IS_ERR_VALUE((unsigned long)(int)fpi->dpram_offset)) { ret = fpi->dpram_offset; goto out_fcccp; } diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index 64300ac13e02..90f82df0b1bb 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -136,7 +136,7 @@ static int allocate_bd(struct net_device *dev) fep->ring_mem_addr = cpm_dpalloc((fpi->tx_ring + fpi->rx_ring) * sizeof(cbd_t), 8); - if (IS_ERR_VALUE(fep->ring_mem_addr)) + if (IS_ERR_VALUE((unsigned long)(int)fep->ring_mem_addr)) return -ENOMEM; fep->ring_base = (void __iomem __force*) -- 2.17.1
Re: [PATCH v2 00/19] Add generic vdso_base tracking
Le 24/11/2020 à 01:29, Dmitry Safonov a écrit : v2 Changes: - Rename user_landing to vdso_base as it tracks vDSO VMA start address, rather than the explicit address to land (Andy) - Reword and don't use "new-execed" and "new-born" task (Andy) - Fix failures reported by build robot Started from discussion [1], where was noted that currently a couple of architectures support mremap() for vdso/sigpage, but not munmap(). If an application maps something on the ex-place of vdso/sigpage, later after processing signal it will land there (good luck!) Patches set is based on linux-next (next-20201123) and it depends on changes in x86/cleanups (those reclaim TIF_IA32/TIF_X32) and also on my changes in akpm (fixing several mremap() issues). I have a series that cleans up VDSO init on powerpc and migrates powerpc to _install_special_mapping() (patch 10 of the series). https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=204396=%2A=both I'm wondering how we should coordinate with your series for merging. I guess your series will also imply removal of arch_unmap() ? see https://elixir.bootlin.com/linux/v5.10-rc4/source/arch/powerpc/include/asm/mmu_context.h#L262 Logically, the patches set divides on: - patch 1: a cleanup for patches in x86/cleanups - patches 2-11: cleanups for arch_setup_additional_pages() - patches 12-13: x86 signal changes for unmapped vdso - patches 14-19: provide generic vdso_base in mm_struct In the end, besides cleanups, it's now more predictable what happens for applications with unmapped vdso on architectures those support .mremap() for vdso/sigpage. I'm aware of only one user that unmaps vdso - Valgrind [2]. (there possibly are more, but this one is "special", it unmaps vdso, but not vvar, which confuses CRIU [Checkpoint Restore In Userspace], that's why I'm aware of it) Patches as a .git branch: https://github.com/0x7f454c46/linux/tree/setup_additional_pages-v2 v1 Link: https://lore.kernel.org/lkml/20201108051730.2042693-1-d...@arista.com/ [1]: https://lore.kernel.org/linux-arch/cajwjo6zanqykshbq+3b+fi_vt80mtrzev5yreqawx-l8j8x...@mail.gmail.com/ [2]: https://github.com/checkpoint-restore/criu/issues/488 Christophe
[PATCH kernel v4 8/8] powerpc/pci: Remove LSI mappings on device teardown
From: Oliver O'Halloran When a passthrough IO adapter is removed from a pseries machine using hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS to clear all page table entries related to the adapter. If some are still present, the RTAS call which isolates the PCI slot returns error 9001 "valid outstanding translations" and the removal of the IO adapter fails. This is because when the PHBs are scanned, Linux maps automatically the INTx interrupts in the Linux interrupt number space but these are never removed. This problem can be fixed by adding the corresponding unmap operation when the device is removed. There's no pcibios_* hook for the remove case, but the same effect can be achieved using a bus notifier. Signed-off-by: Oliver O'Halloran Reviewed-by: Cédric Le Goater Signed-off-by: Alexey Kardashevskiy --- arch/powerpc/kernel/pci-common.c | 21 + 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be108616a721..95f4e173368a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -404,6 +404,27 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) return 0; } +static int ppc_pci_unmap_irq_line(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_DEL_DEVICE) + irq_dispose_mapping(pdev->irq); + + return NOTIFY_DONE; +} + +static struct notifier_block ppc_pci_unmap_irq_notifier = { + .notifier_call = ppc_pci_unmap_irq_line, +}; + +static int ppc_pci_register_irq_notifier(void) +{ + return bus_register_notifier(_bus_type, _pci_unmap_irq_notifier); +} +arch_initcall(ppc_pci_register_irq_notifier); + /* * Platform support for /proc/bus/pci/X/Y mmap()s. * -- paulus. -- 2.17.1
[PATCH kernel v4 6/8] genirq/irqdomain: Move hierarchical IRQ cleanup to kobject_release
This moves hierarchical domain's irqs cleanup into the kobject release hook to make irq_domain_free_irqs() as simple as kobject_put. Signed-off-by: Alexey Kardashevskiy --- kernel/irq/irqdomain.c | 43 +- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4779d912bb86..a0a81cc6c524 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -863,21 +863,9 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping); */ void irq_dispose_mapping(unsigned int virq) { - struct irq_data *irq_data = irq_get_irq_data(virq); - struct irq_domain *domain; + struct irq_desc *desc = irq_to_desc(virq); - if (!virq || !irq_data) - return; - - domain = irq_data->domain; - if (WARN_ON(domain == NULL)) - return; - - if (irq_domain_is_hierarchy(domain)) { - irq_domain_free_irqs(virq, 1); - } else { - irq_free_desc(virq); - } + kobject_put(>kobj); } EXPORT_SYMBOL_GPL(irq_dispose_mapping); @@ -1396,6 +1384,19 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } +static void irq_domain_hierarchy_free_desc(struct irq_desc *desc) +{ + unsigned int virq = desc->irq_data.irq; + struct irq_data *data = irq_get_irq_data(virq); + + mutex_lock(_domain_mutex); + irq_domain_remove_irq(virq); + irq_domain_free_irqs_hierarchy(data->domain, virq, 1); + mutex_unlock(_domain_mutex); + + irq_domain_free_irq_data(virq, 1); +} + int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq, unsigned int nr_irqs, int node, void *arg, const struct irq_affinity_desc *affinity) @@ -1430,7 +1431,10 @@ int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq, } for (i = 0; i < nr_irqs; i++) { + struct irq_desc *desc = irq_to_desc(virq + i); + irq_domain_insert_irq(virq + i); + desc->free_irq = irq_domain_hierarchy_free_desc; } mutex_unlock(_domain_mutex); @@ -1675,14 +1679,11 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) "NULL pointer, cannot free irq\n")) return; - mutex_lock(_domain_mutex); - for (i = 0; i < nr_irqs; i++) - irq_domain_remove_irq(virq + i); - irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs); - mutex_unlock(_domain_mutex); + for (i = 0; i < nr_irqs; i++) { + struct irq_desc *desc = irq_to_desc(virq + i); - irq_domain_free_irq_data(virq, nr_irqs); - irq_free_descs(virq, nr_irqs); + kobject_put(>kobj); + } } /** -- 2.17.1
[PATCH kernel v4 5/8] genirq: Add free_irq hook for IRQ descriptor and use for mapping disposal
We want to make the irq_desc.kobj's release hook free associated resources but we do not want to pollute the irqdesc code with domains. This adds a free_irq hook which is called when the last reference to the descriptor is dropped. The first user is mapped irqs. This potentially can break the existing users; however they seem to do the right thing and call dispose once per mapping. Signed-off-by: Alexey Kardashevskiy --- include/linux/irqdesc.h| 1 + include/linux/irqdomain.h | 2 -- include/linux/irqhandler.h | 1 + kernel/irq/irqdesc.c | 3 +++ kernel/irq/irqdomain.c | 14 -- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5745491303e0..6d44cb6a20ad 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -57,6 +57,7 @@ struct irq_desc { struct irq_data irq_data; unsigned int __percpu *kstat_irqs; irq_flow_handler_t handle_irq; + irq_free_handler_t free_irq; struct irqaction*action;/* IRQ action list */ unsigned intstatus_use_accessors; unsigned intcore_internal_state__do_not_mess_with_it; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a353b93ddf9e..ccca87cd3d15 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -381,8 +381,6 @@ extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, extern void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -extern void irq_domain_disassociate(struct irq_domain *domain, - unsigned int irq); extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index c30f454a9518..3dbc2bb764f3 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -10,5 +10,6 @@ struct irq_desc; struct irq_data; typedefvoid (*irq_flow_handler_t)(struct irq_desc *desc); +typedefvoid (*irq_free_handler_t)(struct irq_desc *desc); #endif diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 75374b7944b5..071363da8688 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -427,6 +427,9 @@ static void irq_kobj_release(struct kobject *kobj) struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); unsigned int irq = desc->irq_data.irq; + if (desc->free_irq) + desc->free_irq(desc); + irq_remove_debugfs_entry(desc); unregister_irq_proc(irq, desc); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 805478f81d96..4779d912bb86 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -485,7 +485,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain, } } -void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) +static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) { struct irq_data *irq_data = irq_get_irq_data(irq); irq_hw_number_t hwirq; @@ -582,6 +582,13 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, } EXPORT_SYMBOL_GPL(irq_domain_associate_many); +static void irq_mapped_free_desc(struct irq_desc *desc) +{ + unsigned int virq = desc->irq_data.irq; + + irq_domain_disassociate(desc->irq_data.domain, virq); +} + /** * irq_create_direct_mapping() - Allocate an irq for direct mapping * @domain: domain to allocate the irq for or NULL for default domain @@ -638,6 +645,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, { struct device_node *of_node; int virq; + struct irq_desc *desc; pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); @@ -674,6 +682,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain, pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", hwirq, of_node_full_name(of_node), virq); + desc = irq_to_desc(virq); + desc->free_irq = irq_mapped_free_desc; + return virq; } EXPORT_SYMBOL_GPL(irq_create_mapping); @@ -865,7 +876,6 @@ void irq_dispose_mapping(unsigned int virq) if (irq_domain_is_hierarchy(domain)) { irq_domain_free_irqs(virq, 1); } else { - irq_domain_disassociate(domain, virq); irq_free_desc(virq); } } -- 2.17.1
[PATCH kernel v4 3/8] genirq/irqdomain: Drop unused realloc parameter from __irq_domain_alloc_irqs
The two previous patches made @realloc obsolete. This finishes removing it. Signed-off-by: Alexey Kardashevskiy --- include/linux/irqdomain.h | 4 +--- arch/x86/kernel/apic/io_apic.c | 2 +- drivers/gpio/gpiolib.c | 1 - drivers/irqchip/irq-armada-370-xp.c | 2 +- drivers/irqchip/irq-bcm2836.c | 3 +-- drivers/irqchip/irq-gic-v3.c| 3 +-- drivers/irqchip/irq-gic-v4.c| 6 ++ drivers/irqchip/irq-gic.c | 3 +-- drivers/irqchip/irq-ixp4xx.c| 1 - kernel/irq/ipi.c| 2 +- kernel/irq/irqdomain.c | 4 +--- kernel/irq/msi.c| 2 +- 12 files changed, 11 insertions(+), 22 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 6cc37bba9951..a353b93ddf9e 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -475,7 +475,6 @@ extern int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq, const struct irq_affinity_desc *affinity); extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, - bool realloc, const struct irq_affinity_desc *affinity); extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early); @@ -484,8 +483,7 @@ extern void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { - return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, - NULL); + return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, NULL); } extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index df9c0ab3a119..5b45f0874571 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -973,7 +973,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, if (irq == -1 || !legacy) return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info), - info, false, NULL); + info, NULL); return __irq_domain_alloc_irqs_data(domain, irq, 1, ioapic_alloc_attr_node(info), diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 089ddcaa9bc6..b7cfecb5c701 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1059,7 +1059,6 @@ static void gpiochip_set_hierarchical_irqchip(struct gpio_chip *gc, 1, NUMA_NO_NODE, , - false, NULL); if (ret < 0) { chip_err(gc, diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index d7eb2e93db8f..bf17eb312669 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -431,7 +431,7 @@ static __init void armada_xp_ipi_init(struct device_node *node) irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI); base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, IPI_DOORBELL_END, - NUMA_NO_NODE, NULL, false, NULL); + NUMA_NO_NODE, NULL, NULL); if (WARN_ON(!base_ipi)) return; diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index cbc7c740e4dc..fe9ff90940d3 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -269,8 +269,7 @@ static void __init bcm2836_arm_irqchip_smp_init(void) irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI); base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, BITS_PER_MBOX, - NUMA_NO_NODE, NULL, - false, NULL); + NUMA_NO_NODE, NULL, NULL); if (WARN_ON(!base_ipi)) return; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 16fecc0febe8..ff20fd54921f 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1163,8 +1163,7 @@ static void __init gic_smp_init(void) /* Register all 8 non-secure SGIs */ base_sgi =
[PATCH kernel v4 7/8] genirq/irqdomain: Reference irq_desc for already mapped irqs
This references an irq_desc if already mapped interrupt requested to map again. This happends for PCI legacy interrupts where 4 interrupts are shared among all devices on the same PCI host bus adapter. >From now on, the users shall call irq_dispose_mapping() for every irq_create_fwspec_mapping(). Most (all?) users do not bother with disposing though so it is not very likely to break many things. Signed-off-by: Alexey Kardashevskiy --- kernel/irq/irqdomain.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index a0a81cc6c524..07f4bde87de5 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -663,7 +663,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain, /* Check if mapping already exists */ virq = irq_find_mapping(domain, hwirq); if (virq) { + desc = irq_to_desc(virq); pr_debug("-> existing mapping on virq %d\n", virq); + kobject_get(>kobj); return virq; } @@ -762,6 +764,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) irq_hw_number_t hwirq; unsigned int type = IRQ_TYPE_NONE; int virq; + struct irq_desc *desc; if (fwspec->fwnode) { domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED); @@ -798,8 +801,11 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) * current trigger type then we are done so return the * interrupt number. */ - if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) + if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) { + desc = irq_to_desc(virq); + kobject_get(>kobj); return virq; + } /* * If the trigger type has not been set yet, then set @@ -811,6 +817,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) return 0; irqd_set_trigger_type(irq_data, type); + desc = irq_to_desc(virq); + kobject_get(>kobj); return virq; } -- 2.17.1
[PATCH kernel v4 0/8] genirq/irqdomain: Add reference counting to IRQs
This is another attempt to add reference counting to IRQ descriptors; or - more to the point - reuse already existing kobj from irq_desc. This allows the same IRQ to be used several times (such as legacy PCI INTx) and when disposing those, only the last reference drop clears the hardware mappings. Domains do not add references to irq_desc as the whole point of this exercise is to move actual cleanup in hardware to the last reference drop. This only changes sparse interrupts (no idea about the other case yet). No changelog as it is all completely rewritten. I am still running tests but I hope this demonstrates the idea. Some context from Cedric: The background context for such a need is that the POWER9 and POWER10 processors have a new XIVE interrupt controller which uses MMIO pages for interrupt management. Each interrupt has a pair of pages which are required to be unmapped in some environment, like PHB removal. And so, all interrupts need to be unmmaped. 1/8 .. 3/8 are removing confusing "realloc" which not strictly required but I was touching this anyway and legacy interrupts should probably use the new counting anyway; 4/8 .. 6/8 is reordering irq_desc disposal; 7/8 adds extra references (probably missed other places); 8/8 is the fix for the original XIVE bug; it is here for demonstration. I am cc'ing ppc list so people can pull the patches from that patchworks. This is based on sha1 418baf2c28f3 Linus Torvalds "Linux 5.10-rc5". and pushed out to https://github.com/aik/linux/commits/irqs sha1 3955f97c448242f6a Please comment. Thanks. Alexey Kardashevskiy (7): genirq/ipi: Simplify irq_reserve_ipi genirq/irqdomain: Clean legacy IRQ allocation genirq/irqdomain: Drop unused realloc parameter from __irq_domain_alloc_irqs genirq: Free IRQ descriptor via embedded kobject genirq: Add free_irq hook for IRQ descriptor and use for mapping disposal genirq/irqdomain: Move hierarchical IRQ cleanup to kobject_release genirq/irqdomain: Reference irq_desc for already mapped irqs Oliver O'Halloran (1): powerpc/pci: Remove LSI mappings on device teardown include/linux/irqdesc.h | 1 + include/linux/irqdomain.h | 9 +- include/linux/irqhandler.h | 1 + arch/powerpc/kernel/pci-common.c| 21 arch/x86/kernel/apic/io_apic.c | 13 ++- drivers/gpio/gpiolib.c | 1 - drivers/irqchip/irq-armada-370-xp.c | 2 +- drivers/irqchip/irq-bcm2836.c | 3 +- drivers/irqchip/irq-gic-v3.c| 3 +- drivers/irqchip/irq-gic-v4.c| 6 +- drivers/irqchip/irq-gic.c | 3 +- drivers/irqchip/irq-ixp4xx.c| 1 - kernel/irq/ipi.c| 16 +-- kernel/irq/irqdesc.c| 45 +++- kernel/irq/irqdomain.c | 160 +--- kernel/irq/msi.c| 2 +- 16 files changed, 158 insertions(+), 129 deletions(-) -- 2.17.1
[PATCH kernel v4 4/8] genirq: Free IRQ descriptor via embedded kobject
At the moment the IRQ descriptor is freed via the free_desc() helper. We want to add reference counting to IRQ descriptors and there is already kobj embedded into irq_desc which we want to reuse. This shuffles free_desc()/etc to make it simply call kobject_put() and moves all the cleanup into the kobject_release hook. As a bonus, we do not need irq_sysfs_del() as kobj removes itself from sysfs if it knows that it was added. This should cause no behavioral change. Signed-off-by: Alexey Kardashevskiy --- kernel/irq/irqdesc.c | 42 -- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1a7723604399..75374b7944b5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -295,18 +295,6 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) } } -static void irq_sysfs_del(struct irq_desc *desc) -{ - /* -* If irq_sysfs_init() has not yet been invoked (early boot), then -* irq_kobj_base is NULL and the descriptor was never added. -* kobject_del() complains about a object with no parent, so make -* it conditional. -*/ - if (irq_kobj_base) - kobject_del(>kobj); -} - static int __init irq_sysfs_init(void) { struct irq_desc *desc; @@ -337,7 +325,6 @@ static struct kobj_type irq_kobj_type = { }; static void irq_sysfs_add(int irq, struct irq_desc *desc) {} -static void irq_sysfs_del(struct irq_desc *desc) {} #endif /* CONFIG_SYSFS */ @@ -419,39 +406,34 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, return NULL; } -static void irq_kobj_release(struct kobject *kobj) -{ - struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - - free_masks(desc); - free_percpu(desc->kstat_irqs); - kfree(desc); -} - static void delayed_free_desc(struct rcu_head *rhp) { struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu); + free_masks(desc); + free_percpu(desc->kstat_irqs); + kfree(desc); +} + +static void free_desc(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + kobject_put(>kobj); } -static void free_desc(unsigned int irq) +static void irq_kobj_release(struct kobject *kobj) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + unsigned int irq = desc->irq_data.irq; irq_remove_debugfs_entry(desc); unregister_irq_proc(irq, desc); /* -* sparse_irq_lock protects also show_interrupts() and -* kstat_irq_usr(). Once we deleted the descriptor from the -* sparse tree we can free it. Access in proc will fail to -* lookup the descriptor. -* * The sysfs entry must be serialized against a concurrent * irq_sysfs_init() as well. */ - irq_sysfs_del(desc); delete_irq_desc(irq); /* -- 2.17.1
[PATCH kernel v4 1/8] genirq/ipi: Simplify irq_reserve_ipi
__irq_domain_alloc_irqs() can already handle virq==-1 and free descriptors if it failed allocating hardware interrupts so let's skip this extra step. Signed-off-by: Alexey Kardashevskiy --- kernel/irq/ipi.c | 16 +++- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 43e3d1be622c..1b2807318ea9 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -75,18 +75,12 @@ int irq_reserve_ipi(struct irq_domain *domain, } } - virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL); - if (virq <= 0) { - pr_warn("Can't reserve IPI, failed to alloc descs\n"); - return -ENOMEM; - } - - virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE, - (void *) dest, true, NULL); + virq = __irq_domain_alloc_irqs(domain, -1, nr_irqs, NUMA_NO_NODE, + (void *) dest, false, NULL); if (virq <= 0) { pr_warn("Can't reserve IPI, failed to alloc hw irqs\n"); - goto free_descs; + return -EBUSY; } for (i = 0; i < nr_irqs; i++) { @@ -96,10 +90,6 @@ int irq_reserve_ipi(struct irq_domain *domain, irq_set_status_flags(virq + i, IRQ_NO_BALANCING); } return virq; - -free_descs: - irq_free_descs(virq, nr_irqs); - return -EBUSY; } /** -- 2.17.1
[PATCH kernel v4 2/8] genirq/irqdomain: Clean legacy IRQ allocation
There are 10 users of __irq_domain_alloc_irqs() and only one - IOAPIC - passes realloc==true. There is no obvious reason for handling this specific case in the generic code. This splits out __irq_domain_alloc_irqs_data() to make it clear what IOAPIC does and makes __irq_domain_alloc_irqs() cleaner. This should cause no behavioral change. Signed-off-by: Alexey Kardashevskiy --- include/linux/irqdomain.h | 3 ++ arch/x86/kernel/apic/io_apic.c | 13 +++-- kernel/irq/irqdomain.c | 89 -- 3 files changed, 65 insertions(+), 40 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 71535e87109f..6cc37bba9951 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -470,6 +470,9 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par ops, host_data); } +extern int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq, + unsigned int nr_irqs, int node, void *arg, + const struct irq_affinity_desc *affinity); extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7b3c7e0d4a09..df9c0ab3a119 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -970,9 +970,14 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, return -1; } - return __irq_domain_alloc_irqs(domain, irq, 1, - ioapic_alloc_attr_node(info), - info, legacy, NULL); + if (irq == -1 || !legacy) + return __irq_domain_alloc_irqs(domain, irq, 1, + ioapic_alloc_attr_node(info), + info, false, NULL); + + return __irq_domain_alloc_irqs_data(domain, irq, 1, + ioapic_alloc_attr_node(info), + info, NULL); } /* @@ -1006,7 +1011,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, return -ENOMEM; } else { info->flags |= X86_IRQ_ALLOC_LEGACY; - irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, + irq = __irq_domain_alloc_irqs_data(domain, irq, 1, node, info, NULL); if (irq >= 0) { irq_data = irq_domain_get_irq_data(domain, irq); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf8b374b892d..ca5c78366c85 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1386,6 +1386,51 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } +int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq, +unsigned int nr_irqs, int node, void *arg, +const struct irq_affinity_desc *affinity) +{ + int i, ret; + + if (domain == NULL) { + domain = irq_default_domain; + if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) + return -EINVAL; + } + + if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) { + pr_debug("cannot allocate memory for IRQ%d\n", virq); + ret = -ENOMEM; + goto out_free_irq_data; + } + + mutex_lock(_domain_mutex); + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); + if (ret < 0) { + mutex_unlock(_domain_mutex); + goto out_free_irq_data; + } + + for (i = 0; i < nr_irqs; i++) { + ret = irq_domain_trim_hierarchy(virq + i); + if (ret) { + mutex_unlock(_domain_mutex); + goto out_free_irq_data; + } + } + + for (i = 0; i < nr_irqs; i++) { + irq_domain_insert_irq(virq + i); + } + mutex_unlock(_domain_mutex); + + return virq; + +out_free_irq_data: + irq_domain_free_irq_data(virq, nr_irqs); + return ret; +} + /** * __irq_domain_alloc_irqs - Allocate IRQs from domain * @domain:domain to allocate from @@ -1412,7 +1457,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity) { - int i, ret, virq; + int ret, virq; if
Re: [PATCH v3 0/6] ibmvfc: Protocol definition updates and new targetWWPN Support
On Tue, 17 Nov 2020 19:10:58 -0600, Tyrel Datwyler wrote: > Several Management Datagrams (MADs) have been reversioned to add a targetWWPN > field that is intended to better identify a target over in place of the > scsi_id. > This patchset adds the new protocol definitions and implements support for > using > the new targetWWPN field and exposing the capability to the VIOS. This > targetWWPN support is a prerequisuite for upcoming channelization/MQ support. > > changes in v3: > * addressed field naming consistency in Patches 2 & 5 in response to [brking] > * fixed commit log typos > * fixed bad rebase of Patch 4 such that it now compiles > > [...] Applied to 5.11/scsi-queue, thanks! [1/6] scsi: ibmvfc: Deduplicate common ibmvfc_cmd init code https://git.kernel.org/mkp/scsi/c/fad74a1be2db [2/6] scsi: ibmvfc: Add new fields for version 2 of several MADs https://git.kernel.org/mkp/scsi/c/c16b8a6d8af1 [3/6] scsi: ibmvfc: Add helper for testing capability flags https://git.kernel.org/mkp/scsi/c/a318c2b71cce [4/6] scsi: ibmvfc: Add FC payload retrieval routines for versioned vfcFrames https://git.kernel.org/mkp/scsi/c/5a9d16f71c26 [5/6] scsi: ibmvfc: Add support for target_wwpn field in v2 MADs and vfcFrame https://git.kernel.org/mkp/scsi/c/ebc7c74bd2dc [6/6] scsi: ibmvfc: Advertise client support for targetWWPN using v2 commands https://git.kernel.org/mkp/scsi/c/e4af87b7079e -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events
On 11/24/20 10:21 AM, Namhyung Kim wrote: Hello, On Mon, Nov 23, 2020 at 8:00 PM Michael Ellerman wrote: Namhyung Kim writes: Hi Peter and Kan, (Adding PPC folks) On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim wrote: Hello, On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan wrote: On 11/11/2020 11:25 AM, Peter Zijlstra wrote: On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote: - When the large PEBS was introduced (9c964efa4330), the sched_task() should be invoked to flush the PEBS buffer in each context switch. However, The perf_sched_events in account_event() is not updated accordingly. The perf_event_task_sched_* never be invoked for a pure per-CPU context. Only per-task event works. At that time, the perf_pmu_sched_task() is outside of perf_event_context_sched_in/out. It means that perf has to double perf_pmu_disable() for per-task event. - The patch 1 tries to fix broken per-CPU events. The CPU context cannot be retrieved from the task->perf_event_ctxp. So it has to be tracked in the sched_cb_list. Yes, the code is very similar to the original codes, but it is actually the new code for per-CPU events. The optimization for per-task events is still kept. For the case, which has both a CPU context and a task context, yes, the __perf_pmu_sched_task() in this patch is not invoked. Because the sched_task() only need to be invoked once in a context switch. The sched_task() will be eventually invoked in the task context. The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB and only set that for large pebs. Are you sure the other users (Intel LBR and PowerPC BHRB) don't need it? I didn't set it for LBR, because the perf_sched_events is always enabled for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB for LBR. if (has_branch_stack(event)) inc = true; If they indeed do not require the pmu::sched_task() callback for CPU events, then I still think the whole perf_sched_cb_{inc,dec}() interface No, LBR requires the pmu::sched_task() callback for CPU events. Now, The LBR registers have to be reset in sched in even for CPU events. To fix the shorter LBR callstack issue for CPU events, we also need to save/restore LBRs in pmu::sched_task(). https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.li...@linux.intel.com/ is confusing at best. Can't we do something like this instead? I think the below patch may have two issues. - PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as well) now. - We may disable the large PEBS later if not all PEBS events support large PEBS. The PMU need a way to notify the generic code to decrease the nr_sched_task. Any updates on this? I've reviewed and tested Kan's patches and they all look good. Maybe we can talk to PPC folks to confirm the BHRB case? Can we move this forward? I saw patch 3/3 also adds PERF_ATTACH_SCHED_CB for PowerPC too. But it'd be nice if ppc folks can confirm the change. Sorry I've read the whole thread, but I'm still not entirely sure I understand the question. Thanks for your time and sorry about not being clear enough. We found per-cpu events are not calling pmu::sched_task() on context switches. So PERF_ATTACH_SCHED_CB was added to indicate the core logic that it needs to invoke the callback. The patch 3/3 added the flag to PPC (for BHRB) with other changes (I think it should be split like in the patch 2/3) and want to get ACKs from the PPC folks. Sorry for delay. I guess first it will be better to split the ppc change to a separate patch, secondly, we are missing the changes needed in the power_pmu_bhrb_disable() where perf_sched_cb_dec() needs the "state" to be included. Maddy Thanks, Namhyung
Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events
Hello, On Mon, Nov 23, 2020 at 8:00 PM Michael Ellerman wrote: > > Namhyung Kim writes: > > Hi Peter and Kan, > > > > (Adding PPC folks) > > > > On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim wrote: > >> > >> Hello, > >> > >> On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan > >> wrote: > >> > > >> > > >> > > >> > On 11/11/2020 11:25 AM, Peter Zijlstra wrote: > >> > > On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote: > >> > > > >> > >> - When the large PEBS was introduced (9c964efa4330), the sched_task() > >> > >> should > >> > >> be invoked to flush the PEBS buffer in each context switch. However, > >> > >> The > >> > >> perf_sched_events in account_event() is not updated accordingly. The > >> > >> perf_event_task_sched_* never be invoked for a pure per-CPU context. > >> > >> Only > >> > >> per-task event works. > >> > >> At that time, the perf_pmu_sched_task() is outside of > >> > >> perf_event_context_sched_in/out. It means that perf has to double > >> > >> perf_pmu_disable() for per-task event. > >> > > > >> > >> - The patch 1 tries to fix broken per-CPU events. The CPU context > >> > >> cannot be > >> > >> retrieved from the task->perf_event_ctxp. So it has to be tracked in > >> > >> the > >> > >> sched_cb_list. Yes, the code is very similar to the original codes, > >> > >> but it > >> > >> is actually the new code for per-CPU events. The optimization for > >> > >> per-task > >> > >> events is still kept. > >> > >>For the case, which has both a CPU context and a task context, > >> > >> yes, the > >> > >> __perf_pmu_sched_task() in this patch is not invoked. Because the > >> > >> sched_task() only need to be invoked once in a context switch. The > >> > >> sched_task() will be eventually invoked in the task context. > >> > > > >> > > The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB and > >> > > only set that for large pebs. Are you sure the other users (Intel LBR > >> > > and PowerPC BHRB) don't need it? > >> > > >> > I didn't set it for LBR, because the perf_sched_events is always enabled > >> > for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB > >> > for LBR. > >> > > >> > if (has_branch_stack(event)) > >> > inc = true; > >> > > >> > > > >> > > If they indeed do not require the pmu::sched_task() callback for CPU > >> > > events, then I still think the whole perf_sched_cb_{inc,dec}() > >> > > interface > >> > > >> > No, LBR requires the pmu::sched_task() callback for CPU events. > >> > > >> > Now, The LBR registers have to be reset in sched in even for CPU events. > >> > > >> > To fix the shorter LBR callstack issue for CPU events, we also need to > >> > save/restore LBRs in pmu::sched_task(). > >> > https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.li...@linux.intel.com/ > >> > > >> > > is confusing at best. > >> > > > >> > > Can't we do something like this instead? > >> > > > >> > I think the below patch may have two issues. > >> > - PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as well) > >> > now. > >> > - We may disable the large PEBS later if not all PEBS events support > >> > large PEBS. The PMU need a way to notify the generic code to decrease > >> > the nr_sched_task. > >> > >> Any updates on this? I've reviewed and tested Kan's patches > >> and they all look good. > >> > >> Maybe we can talk to PPC folks to confirm the BHRB case? > > > > Can we move this forward? I saw patch 3/3 also adds PERF_ATTACH_SCHED_CB > > for PowerPC too. But it'd be nice if ppc folks can confirm the change. > > Sorry I've read the whole thread, but I'm still not entirely sure I > understand the question. Thanks for your time and sorry about not being clear enough. We found per-cpu events are not calling pmu::sched_task() on context switches. So PERF_ATTACH_SCHED_CB was added to indicate the core logic that it needs to invoke the callback. The patch 3/3 added the flag to PPC (for BHRB) with other changes (I think it should be split like in the patch 2/3) and want to get ACKs from the PPC folks. Thanks, Namhyung
Re: [PATCH 1/3] ibmvfc: byte swap login_buf.resp values in attribute show functions
On Tue, 17 Nov 2020 12:50:29 -0600, Tyrel Datwyler wrote: > Both ibmvfc_show_host_(capabilities|npiv_version) functions retrieve > values from vhost->login_buf.resp buffer. This is the MAD response > buffer from the VIOS and as such any multi-byte non-string values are in > big endian format. > > Byte swap these values to host cpu endian format for better human > readability. Applied to 5.11/scsi-queue, thanks! [1/3] scsi: ibmvfc: Byte swap login_buf.resp values in attribute show functions https://git.kernel.org/mkp/scsi/c/61bdb4eec8d1 [2/3] scsi: ibmvfc: Remove trailing semicolon https://git.kernel.org/mkp/scsi/c/4e0716199ab6 [3/3] scsi: ibmvfc: Use correlation token to tag commands https://git.kernel.org/mkp/scsi/c/2aa0102c6688 -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v3 3/3] powerpc/64s: feature: Work around inline asm issues
Bill Wendling writes: > On Mon, Nov 23, 2020 at 12:10 PM Segher Boessenkool > wrote: >> On Mon, Nov 23, 2020 at 12:01:01PM -0800, Bill Wendling wrote: >> > On Mon, Nov 23, 2020 at 11:58 AM Segher Boessenkool >> > wrote: >> > > > On Sun, Nov 22, 2020 at 10:36 PM Segher Boessenkool >> > > > wrote: >> > > > > "true" (as a result of a comparison) in as is -1, not 1. >> > > >> > > On Mon, Nov 23, 2020 at 11:43:11AM -0800, Bill Wendling wrote: >> > > > What Segher said. :-) Also, if you reverse the comparison, you'll get >> > > > a build error. >> > > >> > > But that means your patch is the wrong way around? >> > > >> > > - .ifgt (label##4b- label##3b)-(label##2b- label##1b);\ >> > > - .error "Feature section else case larger than body";\ >> > > - .endif; \ >> > > + .org . - ((label##4b-label##3b) > (label##2b-label##1b)); \ >> > > >> > > It should be a + in that last line, not a -. >> > >> > I said so in a follow up email. >> >> Yeah, and that arrived a second after I pressed "send" :-) >> > Michael, I apologize for the churn with these patches. I believe the > policy is to resend the match as "v4", correct? > > I ran tests with the change above. It compiled with no error. If I > switch the labels around to ".org . + ((label##2b-label##1b) > > (label##4b-label##3b))", then it fails as expected. I wanted to retain the nicer error reporting for gcc builds, so I did it like this: diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..c4ad33074df5 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -36,6 +36,24 @@ label##2:\ .align 2; \ label##3: + +#ifndef CONFIG_CC_IS_CLANG +#define CHECK_ALT_SIZE(else_size, body_size) \ + .ifgt (else_size) - (body_size);\ + .error "Feature section else case larger than body";\ + .endif; +#else +/* + * If we use the ifgt syntax above, clang's assembler complains about the + * expression being non-absolute when the code appears in an inline assembly + * statement. + * As a workaround use an .org directive that has no effect if the else case + * instructions are smaller than the body, but fails otherwise. + */ +#define CHECK_ALT_SIZE(else_size, body_size) \ + .org . + ((else_size) > (body_size)); +#endif + #define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect) \ label##4: \ .popsection;\ @@ -48,9 +66,7 @@ label##5: \ FTR_ENTRY_OFFSET label##2b-label##5b; \ FTR_ENTRY_OFFSET label##3b-label##5b; \ FTR_ENTRY_OFFSET label##4b-label##5b; \ - .ifgt (label##4b- label##3b)-(label##2b- label##1b);\ - .error "Feature section else case larger than body";\ - .endif; \ + CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \ .popsection; I've pushed a branch with all your patches applied to: https://github.com/linuxppc/linux/commits/next-test Are you able to give that a quick test? It builds clean with clang for me, but we must be using different versions of clang because my branch already builds clean for me even without your patches. cheers
[PATCH V2] powerpc/perf: Fix crash with is_sier_available when pmu is not set
On systems without any specific PMU driver support registered, running 'perf record' with —intr-regs will crash ( perf record -I ). The relevant portion from crash logs and Call Trace: Unable to handle kernel paging request for data at address 0x0068 Faulting instruction address: 0xc013eb18 Oops: Kernel access of bad area, sig: 11 [#1] CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1 NIP: c013eb18 LR: c0139f2c CTR: c0393d80 REGS: c004a07ab4f0 TRAP: 0300 Not tainted (4.18.0-193.el8.ppc64le) NIP [c013eb18] is_sier_available+0x18/0x30 LR [c0139f2c] perf_reg_value+0x6c/0xb0 Call Trace: [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable) [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0 [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0 [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0 [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0 [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480 [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520 [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0 [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120 When perf record session is started with "-I" option, capturing registers on each sample calls is_sier_available() to check for the SIER (Sample Instruction Event Register) availability in the platform. This function in core-book3s accesses 'ppmu->flags'. If a platform specific PMU driver is not registered, ppmu is set to NULL and accessing its members results in a crash. Fix the crash by returning false in is_sier_available() if ppmu is not set. Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER") Reported-by: Sachin Sant Signed-off-by: Athira Rajeev --- Changes in v2: - Corrected the commit message as suggested by Michael Ellerman. arch/powerpc/perf/core-book3s.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 08643cb..1de4770 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; -- 1.8.3.1
[powerpc:fixes-test] BUILD SUCCESS b6b79dd53082db11070b4368d85dd6699ff0b063
haps_hs_smp_defconfig mips decstation_defconfig m68k allmodconfig armxcep_defconfig powerpc kilauea_defconfig powerpc mpc8540_ads_defconfig armmagician_defconfig mips lemote2f_defconfig arc defconfig sh urquell_defconfig sh sdk7780_defconfig alpha defconfig sh ecovec24_defconfig powerpc arches_defconfig powerpc currituck_defconfig arm pxa168_defconfig arm omap2plus_defconfig sh rsk7269_defconfig powerpc katmai_defconfig arm exynos_defconfig powerpc pq2fads_defconfig powerpcsocrates_defconfig mips db1xxx_defconfig arm corgi_defconfig powerpc ppc64_defconfig powerpc taishan_defconfig mips ci20_defconfig arc nsimosci_hs_defconfig xtensa audio_kc705_defconfig mips rbtx49xx_defconfig armmps2_defconfig c6xevmc6474_defconfig armspear3xx_defconfig xtensageneric_kc705_defconfig mips rb532_defconfig powerpc canyonlands_defconfig ia64 allmodconfig ia64 allyesconfig m68kdefconfig m68k allyesconfig nios2 defconfig arc allyesconfig c6x allyesconfig nds32 defconfig nios2allyesconfig cskydefconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig sh allmodconfig parisc defconfig s390 allyesconfig parisc allyesconfig s390defconfig i386 allyesconfig sparcallyesconfig sparc defconfig i386defconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig i386 randconfig-a004-20201123 i386 randconfig-a003-20201123 i386 randconfig-a002-20201123 i386 randconfig-a005-20201123 i386 randconfig-a001-20201123 i386 randconfig-a006-20201123 x86_64 randconfig-a015-20201123 x86_64 randconfig-a011-20201123 x86_64 randconfig-a014-20201123 x86_64 randconfig-a016-20201123 x86_64 randconfig-a012-20201123 x86_64 randconfig-a013-20201123 i386 randconfig-a012-20201123 i386 randconfig-a013-20201123 i386 randconfig-a011-20201123 i386 randconfig-a016-20201123 i386 randconfig-a014-20201123 i386 randconfig-a015-20201123 riscvnommu_k210_defconfig riscvallyesconfig riscvnommu_virt_defconfig riscv allnoconfig riscv defconfig riscv rv32_defconfig riscvallmodconfig x86_64 rhel x86_64 allyesconfig x86_64rhel-7.6-kselftests x86_64 defconfig x86_64 rhel-8.3 x86_64 kexec clang tested configs: x86_64 randconfig-a006-20201123 x86_64 randconfig-a003-20201123 x86_64 randconfig-a004-20201123 x86_64 randconfig-a005-20201123 x86_64 randconfig-a002-20201123 x86_64 randconfig-a001-20201123 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
[powerpc:next-test] BUILD SUCCESS c250581fcf84c34cbaf5b535512b60a5e96970f6
rv32_defconfig mips decstation_defconfig armmvebu_v5_defconfig powerpc pcm030_defconfig armmagician_defconfig umkunit_defconfig powerpc ppa8548_defconfig mips lemote2f_defconfig mipsar7_defconfig sparc sparc64_defconfig arc defconfig sh urquell_defconfig sh ecovec24_defconfig powerpc pq2fads_defconfig powerpcsocrates_defconfig arm corgi_defconfig powerpc ppc64_defconfig powerpc taishan_defconfig mips ci20_defconfig arc nsimosci_hs_defconfig xtensa audio_kc705_defconfig mips rbtx49xx_defconfig armmps2_defconfig c6xevmc6474_defconfig armspear3xx_defconfig mips rb532_defconfig ia64 allmodconfig ia64defconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nios2 defconfig arc allyesconfig c6x allyesconfig nds32 defconfig nios2allyesconfig cskydefconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig sh allmodconfig parisc defconfig s390 allyesconfig parisc allyesconfig i386 allyesconfig sparcallyesconfig sparc defconfig i386defconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig x86_64 randconfig-a006-20201120 x86_64 randconfig-a003-20201120 x86_64 randconfig-a004-20201120 x86_64 randconfig-a005-20201120 x86_64 randconfig-a001-20201120 x86_64 randconfig-a002-20201120 i386 randconfig-a004-20201123 i386 randconfig-a003-20201123 i386 randconfig-a002-20201123 i386 randconfig-a005-20201123 i386 randconfig-a001-20201123 i386 randconfig-a006-20201123 i386 randconfig-a004-20201120 i386 randconfig-a003-20201120 i386 randconfig-a002-20201120 i386 randconfig-a005-20201120 i386 randconfig-a001-20201120 i386 randconfig-a006-20201120 x86_64 randconfig-a015-20201123 x86_64 randconfig-a011-20201123 x86_64 randconfig-a014-20201123 x86_64 randconfig-a016-20201123 x86_64 randconfig-a012-20201123 x86_64 randconfig-a013-20201123 i386 randconfig-a012-20201120 i386 randconfig-a013-20201120 i386 randconfig-a011-20201120 i386 randconfig-a016-20201120 i386 randconfig-a014-20201120 i386 randconfig-a015-20201120 i386 randconfig-a012-20201123 i386 randconfig-a013-20201123 i386 randconfig-a011-20201123 i386 randconfig-a016-20201123 i386 randconfig-a014-20201123 i386 randconfig-a015-20201123 riscvnommu_k210_defconfig riscvallyesconfig riscvnommu_virt_defconfig riscv allnoconfig riscv defconfig riscvallmodconfig x86_64 rhel x86_64 allyesconfig x86_64rhel-7.6-kselftests x86_64 defconfig x86_64 rhel-8.3 x86_64 kexec clang tested configs: x86_64 randconfig-a006-20201123 x86_64 randconfig-a003-20201123 x86_64 randconfig-a004-20201123 x86_64 randconfig-a005-20201123 x86_64 randconfig-a002-20201123 x86_64 randconfig-a001-20201123 x86_64 randconfig-a015-20201120 x86_64 randconfig-a011-20201120 x86_64
[powerpc:merge] BUILD SUCCESS 7c94b5d4e9d328a69d43a11d7e3dfd7a6d762cb6
allyesconfig sparcallyesconfig sparc defconfig i386defconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig i386 randconfig-a004-20201123 i386 randconfig-a003-20201123 i386 randconfig-a002-20201123 i386 randconfig-a005-20201123 i386 randconfig-a001-20201123 i386 randconfig-a006-20201123 x86_64 randconfig-a015-20201123 x86_64 randconfig-a011-20201123 x86_64 randconfig-a014-20201123 x86_64 randconfig-a016-20201123 x86_64 randconfig-a012-20201123 x86_64 randconfig-a013-20201123 i386 randconfig-a012-20201123 i386 randconfig-a013-20201123 i386 randconfig-a011-20201123 i386 randconfig-a016-20201123 i386 randconfig-a014-20201123 i386 randconfig-a015-20201123 riscvnommu_k210_defconfig riscvallyesconfig riscvnommu_virt_defconfig riscv allnoconfig riscv defconfig riscv rv32_defconfig riscvallmodconfig x86_64 rhel x86_64 allyesconfig x86_64rhel-7.6-kselftests x86_64 defconfig x86_64 rhel-8.3 x86_64 kexec clang tested configs: x86_64 randconfig-a006-20201123 x86_64 randconfig-a003-20201123 x86_64 randconfig-a004-20201123 x86_64 randconfig-a005-20201123 x86_64 randconfig-a002-20201123 x86_64 randconfig-a001-20201123 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
Re: [PATCH v3] soc: fsl: dpio: Get the cpumask through cpumask_of(cpu)
On Mon, Oct 19, 2020 at 9:15 PM Yi Wang wrote: > > From: Hao Si > > The local variable 'cpumask_t mask' is in the stack memory, and its address > is assigned to 'desc->affinity' in 'irq_set_affinity_hint()'. > But the memory area where this variable is located is at risk of being > modified. > > During LTP testing, the following error was generated: > > Unable to handle kernel paging request at virtual address 12e9b790 > Mem abort info: > ESR = 0x9607 > Exception class = DABT (current EL), IL = 32 bits > SET = 0, FnV = 0 > EA = 0, S1PTW = 0 > Data abort info: > ISV = 0, ISS = 0x0007 > CM = 0, WnR = 0 > swapper pgtable: 4k pages, 48-bit VAs, pgdp = 75ac5e07 > [12e9b790] pgd=0027dbffe003, pud=0027dbffd003, > pmd=0027b6d61003, pte= > Internal error: Oops: 9607 [#1] PREEMPT SMP > Modules linked in: xt_conntrack > Process read_all (pid: 20171, stack limit = 0x44ea4095) > CPU: 14 PID: 20171 Comm: read_all Tainted: GB W > Hardware name: NXP Layerscape LX2160ARDB (DT) > pstate: 8085 (Nzcv daIf -PAN -UAO) > pc : irq_affinity_hint_proc_show+0x54/0xb0 > lr : irq_affinity_hint_proc_show+0x4c/0xb0 > sp : 1138bc10 > x29: 1138bc10 x28: d131d1e0 > x27: 007000c0 x26: 8025b9480dc0 > x25: 8025b9480da8 x24: 03ff > x23: 8027334f8300 x22: 80272e97d000 > x21: 80272e97d0b0 x20: 8025b9480d80 > x19: 09a49000 x18: > x17: x16: > x15: x14: > x13: x12: 0040 > x11: x10: 802735b79b88 > x9 : x8 : > x7 : 09a49848 x6 : 0003 > x5 : x4 : 08157d6c > x3 : 1138bc10 x2 : 12e9b790 > x1 : x0 : > Call trace: > irq_affinity_hint_proc_show+0x54/0xb0 > seq_read+0x1b0/0x440 > proc_reg_read+0x80/0xd8 > __vfs_read+0x60/0x178 > vfs_read+0x94/0x150 > ksys_read+0x74/0xf0 > __arm64_sys_read+0x24/0x30 > el0_svc_common.constprop.0+0xd8/0x1a0 > el0_svc_handler+0x34/0x88 > el0_svc+0x10/0x14 > Code: f9001bbf 943e0732 f94066c2 b462 (f9400041) > ---[ end trace b495bdcb0b3b732b ]--- > Kernel panic - not syncing: Fatal exception > SMP: stopping secondary CPUs > SMP: failed to stop secondary CPUs 0,2-4,6,8,11,13-15 > Kernel Offset: disabled > CPU features: 0x0,21006008 > Memory Limit: none > ---[ end Kernel panic - not syncing: Fatal exception ]--- > > Fix it by using 'cpumask_of(cpu)' to get the cpumask. > > Signed-off-by: Hao Si > Signed-off-by: Lin Chen > Signed-off-by: Yi Wang Applied for fix. Thanks. > --- > v3: Use cpumask_of(cpu) to get the pre-defined cpumask in the static > cpu_bit_bitmap array. > v2: Place 'cpumask_t mask' in the driver's private data and while at it, > rename it to cpu_mask. > > drivers/soc/fsl/dpio/dpio-driver.c | 5 + > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/drivers/soc/fsl/dpio/dpio-driver.c > b/drivers/soc/fsl/dpio/dpio-driver.c > index 7b642c3..7f397b4 100644 > --- a/drivers/soc/fsl/dpio/dpio-driver.c > +++ b/drivers/soc/fsl/dpio/dpio-driver.c > @@ -95,7 +95,6 @@ static int register_dpio_irq_handlers(struct fsl_mc_device > *dpio_dev, int cpu) > { > int error; > struct fsl_mc_device_irq *irq; > - cpumask_t mask; > > irq = dpio_dev->irqs[0]; > error = devm_request_irq(_dev->dev, > @@ -112,9 +111,7 @@ static int register_dpio_irq_handlers(struct > fsl_mc_device *dpio_dev, int cpu) > } > > /* set the affinity hint */ > - cpumask_clear(); > - cpumask_set_cpu(cpu, ); > - if (irq_set_affinity_hint(irq->msi_desc->irq, )) > + if (irq_set_affinity_hint(irq->msi_desc->irq, cpumask_of(cpu))) > dev_err(_dev->dev, > "irq_set_affinity failed irq %d cpu %d\n", > irq->msi_desc->irq, cpu); > -- > 2.15.2
Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
Oleg Nesterov writes: > Christophe, et al, > > So what? > > Are you going to push your change or should I re-send 1-2 without > whitespace cleanups? I'll take your 1 & 2 and fixup the whitespace issues when applying. cheers > On 11/19, Oleg Nesterov wrote: >> >> On 11/19, Christophe Leroy wrote: >> > >> > I think the following should work, and not require the first patch (compile >> > tested only). >> > >> > --- a/arch/powerpc/kernel/ptrace/ptrace-view.c >> > +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c >> > @@ -234,9 +234,21 @@ static int gpr_get(struct task_struct *target, const >> > struct user_regset *regset, >> >BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != >> > offsetof(struct pt_regs, msr) + sizeof(long)); >> > >> > +#ifdef CONFIG_PPC64 >> > + membuf_write(, >thread.regs->orig_gpr3, >> > + offsetof(struct pt_regs, softe) - offsetof(struct pt_regs, >> > orig_gpr3)); >> > + membuf_store(, 1UL); >> > + >> > + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != >> > + offsetof(struct pt_regs, softe) + sizeof(long)); >> > + >> > + membuf_write(, >thread.regs->trap, >> > + sizeof(struct user_pt_regs) - offsetof(struct pt_regs, >> > trap)); >> > +#else >> >membuf_write(, >thread.regs->orig_gpr3, >> >sizeof(struct user_pt_regs) - >> >offsetof(struct pt_regs, orig_gpr3)); >> > +#endif >> >return membuf_zero(, ELF_NGREG * sizeof(unsigned long) - >> > sizeof(struct user_pt_regs)); >> > } >> >> Probably yes. >> >> This mirrors the previous patch I sent >> (https://lore.kernel.org/lkml/20190917143753.ga12...@redhat.com/) >> and this is exactly what I tried to avoid, we can make a simpler fix now. >> >> But let me repeat, I agree with any fix even if imp my version simplifies >> the code, just >> commit this change and lets forget this problem. >> >> Oleg.
Re: [PATCH 25/25] soc: fsl: qbman: qman: Remove unused variable 'dequeue_wq'
Hi Roy, On Tue, Nov 3, 2020 at 9:31 AM Lee Jones wrote: > > Fixes the following W=1 kernel build warning(s): > > drivers/soc/fsl/qbman/qman.c: In function ‘qman_shutdown_fq’: > drivers/soc/fsl/qbman/qman.c:2700:8: warning: variable ‘dequeue_wq’ set but > not used [-Wunused-but-set-variable] > > Cc: Li Yang > Cc: YueHaibing > Cc: linuxppc-dev@lists.ozlabs.org > Signed-off-by: Lee Jones > --- > drivers/soc/fsl/qbman/qman.c | 8 ++-- > 1 file changed, 2 insertions(+), 6 deletions(-) > > diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c > index 9888a70618730..62b182c3a8b04 100644 > --- a/drivers/soc/fsl/qbman/qman.c > +++ b/drivers/soc/fsl/qbman/qman.c > @@ -2622,7 +2622,7 @@ int qman_shutdown_fq(u32 fqid) > union qm_mc_command *mcc; > union qm_mc_result *mcr; > int orl_empty, drain = 0, ret = 0; > - u32 channel, wq, res; > + u32 channel, res; > u8 state; > > p = get_affine_portal(); > @@ -2655,7 +2655,7 @@ int qman_shutdown_fq(u32 fqid) > DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_QUERYFQ); > /* Need to store these since the MCR gets reused */ > channel = qm_fqd_get_chan(>queryfq.fqd); > - wq = qm_fqd_get_wq(>queryfq.fqd); > + qm_fqd_get_wq(>queryfq.fqd); This probably is not needed also. > > if (channel < qm_channel_pool1) { > channel_portal = get_portal_for_channel(channel); > @@ -2697,7 +2697,6 @@ int qman_shutdown_fq(u32 fqid) > * to dequeue from the channel the FQ is scheduled on > */ > int found_fqrn = 0; > - u16 dequeue_wq = 0; > > /* Flag that we need to drain FQ */ > drain = 1; > @@ -2705,11 +2704,8 @@ int qman_shutdown_fq(u32 fqid) > if (channel >= qm_channel_pool1 && > channel < qm_channel_pool1 + 15) { > /* Pool channel, enable the bit in the portal > */ > - dequeue_wq = (channel - > - qm_channel_pool1 + 1)<<4 | wq; > } else if (channel < qm_channel_pool1) { > /* Dedicated channel */ > - dequeue_wq = wq; With these gone, these if statements seem to be redundant. Can you propose an additional patch to further cleanup the code here? Thanks. > } else { > dev_err(dev, "Can't recover FQ 0x%x, ch: > 0x%x", > fqid, channel); > -- > 2.25.1 >
Re: [PATCH 00/25] Rid W=1 warnings in SoC
On Tue, Nov 3, 2020 at 9:29 AM Lee Jones wrote: > > This set is part of a larger effort attempting to clean-up W=1 > kernel builds, which are currently overwhelmingly riddled with > niggly little warnings. > > Lee Jones (25): > soc: fsl: dpio: qbman-portal: Fix a bunch of kernel-doc misdemeanours > soc: fsl: qe: qe_common: Fix misnamed function attribute 'addr' > soc: fsl: qbman: qman: Remove unused variable 'dequeue_wq' The above are applied for next. Thanks. Regards, Leo > > drivers/soc/bcm/brcmstb/pm/pm-arm.c | 2 + > drivers/soc/fsl/dpio/qbman-portal.c | 18 +-- > drivers/soc/fsl/qbman/qman.c | 8 +-- > drivers/soc/fsl/qe/qe_common.c | 2 +- > drivers/soc/qcom/kryo-l2-accessors.c | 2 +- > drivers/soc/qcom/llcc-qcom.c | 2 +- > drivers/soc/qcom/qcom-geni-se.c | 5 +- > drivers/soc/qcom/qcom_aoss.c | 4 +- > drivers/soc/qcom/rpmh.c | 2 +- > drivers/soc/qcom/rpmhpd.c| 3 ++ > drivers/soc/qcom/smem.c | 3 +- > drivers/soc/qcom/smp2p.c | 3 +- > drivers/soc/qcom/smsm.c | 4 +- > drivers/soc/qcom/wcnss_ctrl.c| 8 +-- > drivers/soc/rockchip/io-domain.c | 3 -- > drivers/soc/samsung/s3c-pm-check.c | 2 +- > drivers/soc/tegra/fuse/speedo-tegra124.c | 7 ++- > drivers/soc/tegra/fuse/speedo-tegra210.c | 8 +-- > drivers/soc/ti/k3-ringacc.c | 1 + > drivers/soc/ti/knav_dma.c| 2 +- > drivers/soc/ti/knav_qmss_queue.c | 62 > drivers/soc/ti/pm33xx.c | 4 +- > drivers/soc/ti/wkup_m3_ipc.c | 8 ++- > 23 files changed, 86 insertions(+), 77 deletions(-) > > Cc: act > Cc: Andy Gross > Cc: bcm-kernel-feedback-l...@broadcom.com > Cc: Ben Dooks > Cc: Bjorn Andersson > Cc: Cyril Chemparathy > Cc: Dan Malek > Cc: Dave Gerlach > Cc: Doug Anderson > Cc: Florian Fainelli > Cc: Heiko Stuebner > Cc: Jonathan Hunter > Cc: Krzysztof Kozlowski > Cc: Liam Girdwood > Cc: linux-arm-...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-rockc...@lists.infradead.org > Cc: linux-samsung-...@vger.kernel.org > Cc: linux-te...@vger.kernel.org > Cc: Li Yang > Cc: Mark Brown > Cc: Qiang Zhao > Cc: "Rafael J. Wysocki" > Cc: Roy Pledge > Cc: Sandeep Nair > Cc: Santosh Shilimkar > Cc: Scott Wood > Cc: "Software, Inc" > Cc: Thierry Reding > Cc: Vitaly Bordug > Cc: YueHaibing > > -- > 2.25.1 >
Re: linux-next: build failure in Linus' tree
Thanks sfr and mpe. > Applied to powerpc/fixes. > > [1/1] powerpc/64s: Fix allnoconfig build since uaccess flush > > https://git.kernel.org/powerpc/c/b6b79dd53082db11070b4368d85dd6699ff0b063 We also needed a similar fix for stable, which has also been applied. I guess I should build some sort of build process that tests a whole range of configs. I did test a few but clearly not enough. Is there a known list that I should be using? Something from kisskb? Kind regards, Daniel Michael Ellerman writes: > On Mon, 23 Nov 2020 18:40:16 +1100, Stephen Rothwell wrote: >> After merging most of the trees, today's linux-next build (powerpc64 >> allnoconfig) failed like this: >> >> In file included from arch/powerpc/include/asm/kup.h:18, >> from arch/powerpc/include/asm/uaccess.h:9, >> from include/linux/uaccess.h:11, >> from include/linux/sched/task.h:11, >> from include/linux/sched/signal.h:9, >> from include/linux/rcuwait.h:6, >> from include/linux/percpu-rwsem.h:7, >> from include/linux/fs.h:33, >> from include/linux/compat.h:17, >> from arch/powerpc/kernel/asm-offsets.c:14: >> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: data >> definition has no type or storage class >>66 | DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); >> | ^~~~ >> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: error: type defaults to >> 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int] >> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: parameter >> names (without types) in function declaration >> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function >> 'prevent_user_access': >> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:6: error: implicit >> declaration of function 'static_branch_unlikely' >> [-Werror=implicit-function-declaration] >> 180 | if (static_branch_unlikely(_flush_key)) >> | ^~ >> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: error: >> 'uaccess_flush_key' undeclared (first use in this function) >> 180 | if (static_branch_unlikely(_flush_key)) >> | ^ >> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: note: each undeclared >> identifier is reported only once for each function it appears in >> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function >> 'prevent_user_access_return': >> arch/powerpc/include/asm/book3s/64/kup-radix.h:189:30: error: >> 'uaccess_flush_key' undeclared (first use in this function) >> 189 | if (static_branch_unlikely(_flush_key)) >> | ^ >> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function >> 'restore_user_access': >> arch/powerpc/include/asm/book3s/64/kup-radix.h:198:30: error: >> 'uaccess_flush_key' undeclared (first use in this function) >> 198 | if (static_branch_unlikely(_flush_key) && flags == >> AMR_KUAP_BLOCKED) >> | ^ >> >> [...] >
Re: linux-next: build failure in Linus' tree
On Mon, 23 Nov 2020 18:40:16 +1100, Stephen Rothwell wrote: > After merging most of the trees, today's linux-next build (powerpc64 > allnoconfig) failed like this: > > In file included from arch/powerpc/include/asm/kup.h:18, > from arch/powerpc/include/asm/uaccess.h:9, > from include/linux/uaccess.h:11, > from include/linux/sched/task.h:11, > from include/linux/sched/signal.h:9, > from include/linux/rcuwait.h:6, > from include/linux/percpu-rwsem.h:7, > from include/linux/fs.h:33, > from include/linux/compat.h:17, > from arch/powerpc/kernel/asm-offsets.c:14: > arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: data definition > has no type or storage class >66 | DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); > | ^~~~ > arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: error: type defaults to > 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int] > arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: parameter names > (without types) in function declaration > arch/powerpc/include/asm/book3s/64/kup-radix.h: In function > 'prevent_user_access': > arch/powerpc/include/asm/book3s/64/kup-radix.h:180:6: error: implicit > declaration of function 'static_branch_unlikely' > [-Werror=implicit-function-declaration] > 180 | if (static_branch_unlikely(_flush_key)) > | ^~ > arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: error: > 'uaccess_flush_key' undeclared (first use in this function) > 180 | if (static_branch_unlikely(_flush_key)) > | ^ > arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: note: each undeclared > identifier is reported only once for each function it appears in > arch/powerpc/include/asm/book3s/64/kup-radix.h: In function > 'prevent_user_access_return': > arch/powerpc/include/asm/book3s/64/kup-radix.h:189:30: error: > 'uaccess_flush_key' undeclared (first use in this function) > 189 | if (static_branch_unlikely(_flush_key)) > | ^ > arch/powerpc/include/asm/book3s/64/kup-radix.h: In function > 'restore_user_access': > arch/powerpc/include/asm/book3s/64/kup-radix.h:198:30: error: > 'uaccess_flush_key' undeclared (first use in this function) > 198 | if (static_branch_unlikely(_flush_key) && flags == > AMR_KUAP_BLOCKED) > | ^ > > [...] Applied to powerpc/fixes. [1/1] powerpc/64s: Fix allnoconfig build since uaccess flush https://git.kernel.org/powerpc/c/b6b79dd53082db11070b4368d85dd6699ff0b063 cheers
Re: [PATCH v3 3/3] powerpc/64s: feature: Work around inline asm issues
On Mon, Nov 23, 2020 at 12:01:01PM -0800, Bill Wendling wrote: > On Mon, Nov 23, 2020 at 11:58 AM Segher Boessenkool > wrote: > > > On Sun, Nov 22, 2020 at 10:36 PM Segher Boessenkool > > > wrote: > > > > "true" (as a result of a comparison) in as is -1, not 1. > > > > On Mon, Nov 23, 2020 at 11:43:11AM -0800, Bill Wendling wrote: > > > What Segher said. :-) Also, if you reverse the comparison, you'll get > > > a build error. > > > > But that means your patch is the wrong way around? > > > > - .ifgt (label##4b- label##3b)-(label##2b- label##1b);\ > > - .error "Feature section else case larger than body";\ > > - .endif; \ > > + .org . - ((label##4b-label##3b) > (label##2b-label##1b)); \ > > > > It should be a + in that last line, not a -. > > I said so in a follow up email. Yeah, and that arrived a second after I pressed "send" :-) > > Was this tested? > > > Please don't be insulting. Anyone can make an error. Absolutely, but it is just a question. It seems you could improve that testing! It helps you yourself most of all ;-) Segher
Re: [PATCH v3 3/3] powerpc/64s: feature: Work around inline asm issues
(Please don't top-post.) > On Sun, Nov 22, 2020 at 10:36 PM Segher Boessenkool > wrote: > > "true" (as a result of a comparison) in as is -1, not 1. On Mon, Nov 23, 2020 at 11:43:11AM -0800, Bill Wendling wrote: > What Segher said. :-) Also, if you reverse the comparison, you'll get > a build error. But that means your patch is the wrong way around? - .ifgt (label##4b- label##3b)-(label##2b- label##1b);\ - .error "Feature section else case larger than body";\ - .endif; \ + .org . - ((label##4b-label##3b) > (label##2b-label##1b)); \ It should be a + in that last line, not a -. Was this tested? Segher
Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
Christophe, et al, So what? Are you going to push your change or should I re-send 1-2 without whitespace cleanups? On 11/19, Oleg Nesterov wrote: > > On 11/19, Christophe Leroy wrote: > > > > I think the following should work, and not require the first patch (compile > > tested only). > > > > --- a/arch/powerpc/kernel/ptrace/ptrace-view.c > > +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c > > @@ -234,9 +234,21 @@ static int gpr_get(struct task_struct *target, const > > struct user_regset *regset, > > BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != > > offsetof(struct pt_regs, msr) + sizeof(long)); > > > > +#ifdef CONFIG_PPC64 > > + membuf_write(, >thread.regs->orig_gpr3, > > +offsetof(struct pt_regs, softe) - offsetof(struct pt_regs, > > orig_gpr3)); > > + membuf_store(, 1UL); > > + > > + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != > > +offsetof(struct pt_regs, softe) + sizeof(long)); > > + > > + membuf_write(, >thread.regs->trap, > > +sizeof(struct user_pt_regs) - offsetof(struct pt_regs, > > trap)); > > +#else > > membuf_write(, >thread.regs->orig_gpr3, > > sizeof(struct user_pt_regs) - > > offsetof(struct pt_regs, orig_gpr3)); > > +#endif > > return membuf_zero(, ELF_NGREG * sizeof(unsigned long) - > > sizeof(struct user_pt_regs)); > > } > > Probably yes. > > This mirrors the previous patch I sent > (https://lore.kernel.org/lkml/20190917143753.ga12...@redhat.com/) > and this is exactly what I tried to avoid, we can make a simpler fix now. > > But let me repeat, I agree with any fix even if imp my version simplifies the > code, just > commit this change and lets forget this problem. > > Oleg.
Re: [PATCH v2 04/19] powerpc/perf: move perf irq/nmi handling details into traps.c
> On 11-Nov-2020, at 3:13 PM, Nicholas Piggin wrote: > > This is required in order to allow more significant differences between > NMI type interrupt handlers and regular asynchronous handlers. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/kernel/traps.c | 31 +++- > arch/powerpc/perf/core-book3s.c | 35 ++-- > arch/powerpc/perf/core-fsl-emb.c | 25 --- > 3 files changed, 32 insertions(+), 59 deletions(-) > > diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c > index 902fcbd1a778..7dda72eb97cc 100644 > --- a/arch/powerpc/kernel/traps.c > +++ b/arch/powerpc/kernel/traps.c > @@ -1919,11 +1919,40 @@ void vsx_unavailable_tm(struct pt_regs *regs) > } > #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ > > -void performance_monitor_exception(struct pt_regs *regs) > +static void performance_monitor_exception_nmi(struct pt_regs *regs) > +{ > + nmi_enter(); > + > + __this_cpu_inc(irq_stat.pmu_irqs); > + > + perf_irq(regs); > + > + nmi_exit(); > +} > + > +static void performance_monitor_exception_async(struct pt_regs *regs) > { > + irq_enter(); > + > __this_cpu_inc(irq_stat.pmu_irqs); > > perf_irq(regs); > + > + irq_exit(); > +} > + > +void performance_monitor_exception(struct pt_regs *regs) > +{ > + /* > + * On 64-bit, if perf interrupts hit in a local_irq_disable > + * (soft-masked) region, we consider them as NMIs. This is required to > + * prevent hash faults on user addresses when reading callchains (and > + * looks better from an irq tracing perspective). > + */ > + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs))) > + performance_monitor_exception_nmi(regs); > + else > + performance_monitor_exception_async(regs); > } > > #ifdef CONFIG_PPC_ADV_DEBUG_REGS > diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c > index 08643cba1494..9fd8cae09218 100644 > --- a/arch/powerpc/perf/core-book3s.c > +++ b/arch/powerpc/perf/core-book3s.c > @@ -109,10 +109,6 @@ static inline void perf_read_regs(struct pt_regs *regs) > { > regs->result = 0; > } > -static inline int perf_intr_is_nmi(struct pt_regs *regs) > -{ > - return 0; > -} > > static inline int siar_valid(struct pt_regs *regs) > { > @@ -328,15 +324,6 @@ static inline void perf_read_regs(struct pt_regs *regs) > regs->result = use_siar; > } > > -/* > - * If interrupts were soft-disabled when a PMU interrupt occurs, treat > - * it as an NMI. > - */ > -static inline int perf_intr_is_nmi(struct pt_regs *regs) > -{ > - return (regs->softe & IRQS_DISABLED); > -} > - Hi Nick, arch_irq_disabled_regs checks the regs->softe value, if it has IRQS_DISABLED set. Core-book3s is also using same logic in perf_intr_is_nmi to check if it is an NMI. With the changes in this patch, if I understood correctly, we will do the irq/nmi handling in traps.c rather than doing it in the PMI interrupt handler. But can you please help to understand better on what is the perf weirdness (sometimes NMI, sometimes not) mentioned in the cover letter that we are fixing with this change ? Thanks Athira > /* > * On processors like P7+ that have the SIAR-Valid bit, marked instructions > * must be sampled only if the SIAR-valid bit is set. > @@ -2224,7 +2211,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) > struct perf_event *event; > unsigned long val[8]; > int found, active; > - int nmi; > > if (cpuhw->n_limited) > freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), > @@ -2232,18 +2218,6 @@ static void __perf_event_interrupt(struct pt_regs > *regs) > > perf_read_regs(regs); > > - /* > - * If perf interrupts hit in a local_irq_disable (soft-masked) region, > - * we consider them as NMIs. This is required to prevent hash faults on > - * user addresses when reading callchains. See the NMI test in > - * do_hash_page. > - */ > - nmi = perf_intr_is_nmi(regs); > - if (nmi) > - nmi_enter(); > - else > - irq_enter(); > - > /* Read all the PMCs since we'll need them a bunch of times */ > for (i = 0; i < ppmu->n_counter; ++i) > val[i] = read_pmc(i + 1); > @@ -2289,8 +2263,8 @@ static void __perf_event_interrupt(struct pt_regs *regs) > } > } > } > - if (!found && !nmi && printk_ratelimit()) > - printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); > + if (unlikely(!found) && !arch_irq_disabled_regs(regs)) > + printk_ratelimited(KERN_WARNING "Can't find PMC that caused > IRQ\n"); > > /* >* Reset MMCR0 to its normal value. This will set PMXE and > @@ -2300,11 +2274,6 @@ static void __perf_event_interrupt(struct pt_regs > *regs) >* we get back out of this interrupt. >*/ >
Re: Linux kernel: powerpc: RTAS calls can be used to compromise kernel integrity
On 9/10/20 12:20 pm, Andrew Donnellan wrote: The Linux kernel for powerpc has an issue with the Run-Time Abstraction Services (RTAS) interface, allowing root (or CAP_SYS_ADMIN users) in a VM to overwrite some parts of memory, including kernel memory. This issue impacts guests running on top of PowerVM or KVM hypervisors (pseries platform), and does *not* impact bare-metal machines (powernv platform). CVE-2020-2 has been assigned. -- Andrew Donnellan OzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set
> On 23-Nov-2020, at 4:49 PM, Michael Ellerman wrote: > > Hi Athira, > > Athira Rajeev writes: >> On systems without any platform specific PMU driver support registered or >> Generic Compat PMU support registered, > > The compat PMU is registered just like other PMUs, so I don't see how we > can crash like this if the compat PMU is active? > > ie. if we're using the compat PMU then ppmu will be non-NULL and point > to generic_compat_pmu. Hi Michael, Thanks for checking the patch. Crash happens on systems which neither has compat PMU support registered nor has Platform specific PMU. This happens when the distro do not have either the PMU driver support for that platform or the generic "compat-mode" performance monitoring driver support. So in such cases since compat PMU is in-active, ppmu is not set and results in crash. Sorry for the confusion with my first line. I will correct it. > >> running 'perf record' with >> —intr-regs will crash ( perf record -I ). >> >> The relevant portion from crash logs and Call Trace: >> >> Unable to handle kernel paging request for data at address 0x0068 >> Faulting instruction address: 0xc013eb18 >> Oops: Kernel access of bad area, sig: 11 [#1] >> CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted >> 4.18.0-193.el8.ppc64le #1 >> NIP: c013eb18 LR: c0139f2c CTR: c0393d80 >> REGS: c004a07ab4f0 TRAP: 0300 Not tainted (4.18.0-193.el8.ppc64le) >> NIP [c013eb18] is_sier_available+0x18/0x30 >> LR [c0139f2c] perf_reg_value+0x6c/0xb0 >> Call Trace: >> [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable) >> [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0 >> [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0 >> [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0 >> [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0 >> [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480 >> [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520 >> [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0 >> [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120 >> >> When perf record session started with "-I" option, capture registers > ^ > is > >> via intr-regs, > > "intr-regs" is just the full name for the -I option, so that kind of > repeats itself. > >> on each sample ‘is_sier_available()'i is called to check > ^ > extra i > > The single quotes around is_sier_available() aren't necessary IMO. > >> for the SIER ( Sample Instruction Event Register) availability in the >^ >stray space >> platform. This function in core-book3s access 'ppmu->flags'. If platform > ^ ^ > esa >> specific pmu driver is not registered, ppmu is set to null and accessing > ^^ > PMU NULL >> its members results in crash. Patch fixes this by returning false in >^ >a >> 'is_sier_available()' if 'ppmu' is not set. > > Use the imperative mood for the last sentence which says what the patch > does: > > Fix the crash by returning false in is_sier_available() if ppmu is not set. Sure, I will make all these changes as suggested. Thanks Athira > > >> Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include >> SIER") >> Reported-by: Sachin Sant >> Signed-off-by: Athira Rajeev >> --- >> arch/powerpc/perf/core-book3s.c | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/arch/powerpc/perf/core-book3s.c >> b/arch/powerpc/perf/core-book3s.c >> index 08643cb..1de4770 100644 >> --- a/arch/powerpc/perf/core-book3s.c >> +++ b/arch/powerpc/perf/core-book3s.c >> @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { } >> >> bool is_sier_available(void) >> { >> +if (!ppmu) >> +return false; >> + >> if (ppmu->flags & PPMU_HAS_SIER) >> return true; >> >> -- >> 1.8.3.1 > > > cheers
Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set
Hi Athira, Athira Rajeev writes: > On systems without any platform specific PMU driver support registered or > Generic Compat PMU support registered, The compat PMU is registered just like other PMUs, so I don't see how we can crash like this if the compat PMU is active? ie. if we're using the compat PMU then ppmu will be non-NULL and point to generic_compat_pmu. > running 'perf record' with > —intr-regs will crash ( perf record -I ). > > The relevant portion from crash logs and Call Trace: > > Unable to handle kernel paging request for data at address 0x0068 > Faulting instruction address: 0xc013eb18 > Oops: Kernel access of bad area, sig: 11 [#1] > CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le > #1 > NIP: c013eb18 LR: c0139f2c CTR: c0393d80 > REGS: c004a07ab4f0 TRAP: 0300 Not tainted (4.18.0-193.el8.ppc64le) > NIP [c013eb18] is_sier_available+0x18/0x30 > LR [c0139f2c] perf_reg_value+0x6c/0xb0 > Call Trace: > [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable) > [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0 > [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0 > [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0 > [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0 > [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480 > [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520 > [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0 > [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120 > > When perf record session started with "-I" option, capture registers ^ is > via intr-regs, "intr-regs" is just the full name for the -I option, so that kind of repeats itself. > on each sample ‘is_sier_available()'i is called to check ^ extra i The single quotes around is_sier_available() aren't necessary IMO. > for the SIER ( Sample Instruction Event Register) availability in the ^ stray space > platform. This function in core-book3s access 'ppmu->flags'. If platform ^ ^ esa > specific pmu driver is not registered, ppmu is set to null and accessing ^^ PMU NULL > its members results in crash. Patch fixes this by returning false in ^ a > 'is_sier_available()' if 'ppmu' is not set. Use the imperative mood for the last sentence which says what the patch does: Fix the crash by returning false in is_sier_available() if ppmu is not set. > Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include > SIER") > Reported-by: Sachin Sant > Signed-off-by: Athira Rajeev > --- > arch/powerpc/perf/core-book3s.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c > index 08643cb..1de4770 100644 > --- a/arch/powerpc/perf/core-book3s.c > +++ b/arch/powerpc/perf/core-book3s.c > @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { } > > bool is_sier_available(void) > { > + if (!ppmu) > + return false; > + > if (ppmu->flags & PPMU_HAS_SIER) > return true; > > -- > 1.8.3.1 cheers
Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events
Namhyung Kim writes: > Hi Peter and Kan, > > (Adding PPC folks) > > On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim wrote: >> >> Hello, >> >> On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan wrote: >> > >> > >> > >> > On 11/11/2020 11:25 AM, Peter Zijlstra wrote: >> > > On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote: >> > > >> > >> - When the large PEBS was introduced (9c964efa4330), the sched_task() >> > >> should >> > >> be invoked to flush the PEBS buffer in each context switch. However, The >> > >> perf_sched_events in account_event() is not updated accordingly. The >> > >> perf_event_task_sched_* never be invoked for a pure per-CPU context. >> > >> Only >> > >> per-task event works. >> > >> At that time, the perf_pmu_sched_task() is outside of >> > >> perf_event_context_sched_in/out. It means that perf has to double >> > >> perf_pmu_disable() for per-task event. >> > > >> > >> - The patch 1 tries to fix broken per-CPU events. The CPU context >> > >> cannot be >> > >> retrieved from the task->perf_event_ctxp. So it has to be tracked in the >> > >> sched_cb_list. Yes, the code is very similar to the original codes, but >> > >> it >> > >> is actually the new code for per-CPU events. The optimization for >> > >> per-task >> > >> events is still kept. >> > >>For the case, which has both a CPU context and a task context, yes, >> > >> the >> > >> __perf_pmu_sched_task() in this patch is not invoked. Because the >> > >> sched_task() only need to be invoked once in a context switch. The >> > >> sched_task() will be eventually invoked in the task context. >> > > >> > > The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB and >> > > only set that for large pebs. Are you sure the other users (Intel LBR >> > > and PowerPC BHRB) don't need it? >> > >> > I didn't set it for LBR, because the perf_sched_events is always enabled >> > for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB >> > for LBR. >> > >> > if (has_branch_stack(event)) >> > inc = true; >> > >> > > >> > > If they indeed do not require the pmu::sched_task() callback for CPU >> > > events, then I still think the whole perf_sched_cb_{inc,dec}() interface >> > >> > No, LBR requires the pmu::sched_task() callback for CPU events. >> > >> > Now, The LBR registers have to be reset in sched in even for CPU events. >> > >> > To fix the shorter LBR callstack issue for CPU events, we also need to >> > save/restore LBRs in pmu::sched_task(). >> > https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.li...@linux.intel.com/ >> > >> > > is confusing at best. >> > > >> > > Can't we do something like this instead? >> > > >> > I think the below patch may have two issues. >> > - PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as well) >> > now. >> > - We may disable the large PEBS later if not all PEBS events support >> > large PEBS. The PMU need a way to notify the generic code to decrease >> > the nr_sched_task. >> >> Any updates on this? I've reviewed and tested Kan's patches >> and they all look good. >> >> Maybe we can talk to PPC folks to confirm the BHRB case? > > Can we move this forward? I saw patch 3/3 also adds PERF_ATTACH_SCHED_CB > for PowerPC too. But it'd be nice if ppc folks can confirm the change. Sorry I've read the whole thread, but I'm still not entirely sure I understand the question. cheers
Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set
> When perf record session started with "-I" option, capture registers > via intr-regs, on each sample ‘is_sier_available()'i is called to check > for the SIER ( Sample Instruction Event Register) availability in the > platform. This function in core-book3s access 'ppmu->flags'. If platform > specific pmu driver is not registered, ppmu is set to null and accessing > its members results in crash. Patch fixes this by returning false in > 'is_sier_available()' if 'ppmu' is not set. > > Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include > SIER") > Reported-by: Sachin Sant > Signed-off-by: Athira Rajeev Tested-by: Sachin Sant Thanks -Sachin
Re: [PATCH V2 4/5] ocxl: Add mmu notifier
On 20/11/2020 18:32, Christophe Lombard wrote: Add invalidate_range mmu notifier, when required (ATSD access of MMIO registers is available), to initiate TLB invalidation commands. For the time being, the ATSD0 set of registers is used by default. The pasid and bdf values have to be configured in the Process Element Entry. The PEE must be set up to match the BDF/PASID of the AFU. Signed-off-by: Christophe Lombard --- drivers/misc/ocxl/link.c | 58 +++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 20444db8a2bb..100bdfe9ec37 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -2,8 +2,10 @@ // Copyright 2017 IBM Corp. #include #include +#include #include #include +#include #include #include #include @@ -33,6 +35,7 @@ #define SPA_PE_VALID 0x8000 +struct ocxl_link; struct pe_data { struct mm_struct *mm; @@ -41,6 +44,8 @@ struct pe_data { /* opaque pointer to be passed to the above callback */ void *xsl_err_data; struct rcu_head rcu; + struct ocxl_link *link; + struct mmu_notifier mmu_notifier; }; struct spa { @@ -83,6 +88,8 @@ struct ocxl_link { int domain; int bus; int dev; + void __iomem *arva; /* ATSD register virtual address */ + spinlock_t atsd_lock; /* to serialize shootdowns */ atomic_t irq_available; struct spa *spa; void *platform_data; @@ -403,6 +410,11 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l if (rc) goto err_xsl_irq; + rc = pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0, + >arva); + if (!rc) + spin_lock_init(>atsd_lock); + We could use a comment to say that if arva = 0, then we don't need mmio shootdowns and we rely on hardware snooping. Also, we could always initialize the spin lock, it doesn't hurt and make the code more readable. Fred *out_link = link; return 0; @@ -454,6 +466,11 @@ static void release_xsl(struct kref *ref) { struct ocxl_link *link = container_of(ref, struct ocxl_link, ref); + if (link->arva) { + pnv_ocxl_unmap_lpar(>arva); + link->arva = NULL; + } + list_del(>list); /* call platform code before releasing data */ pnv_ocxl_spa_release(link->platform_data); @@ -470,6 +487,26 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle) } EXPORT_SYMBOL_GPL(ocxl_link_release); +static void invalidate_range(struct mmu_notifier *mn, +struct mm_struct *mm, +unsigned long start, unsigned long end) +{ + struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier); + struct ocxl_link *link = pe_data->link; + unsigned long addr, pid, page_size = PAGE_SIZE; + + pid = mm->context.id; + + spin_lock(>atsd_lock); + for (addr = start; addr < end; addr += page_size) + pnv_ocxl_tlb_invalidate(>arva, pid, addr); + spin_unlock(>atsd_lock); +} + +static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = { + .invalidate_range = invalidate_range, +}; + static u64 calculate_cfg_state(bool kernel) { u64 state; @@ -526,6 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, pe_data->mm = mm; pe_data->xsl_err_cb = xsl_err_cb; pe_data->xsl_err_data = xsl_err_data; + pe_data->link = link; + pe_data->mmu_notifier.ops = _mmu_notifier_ops; memset(pe, 0, sizeof(struct ocxl_process_element)); pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); @@ -542,8 +581,16 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, * by the nest MMU. If we have a kernel context, TLBIs are * already global. */ - if (mm) + if (mm) { mm_context_add_copro(mm); + if (link->arva) { + /* Use MMIO registers for the TLB Invalidate +* operations. +*/ + mmu_notifier_register(_data->mmu_notifier, mm); + } + } + /* * Barrier is to make sure PE is visible in the SPA before it * is used by the device. It also helps with the global TLBI @@ -674,6 +721,15 @@ int ocxl_link_remove_pe(void *link_handle, int pasid) WARN(1, "Couldn't find pe data when removing PE\n"); } else { if (pe_data->mm) { + if (link->arva) { + mmu_notifier_unregister(_data->mmu_notifier, + pe_data->mm); +
Re: [PATCH V2 3/5] ocxl: Update the Process Element Entry
On 20/11/2020 18:32, Christophe Lombard wrote: To complete the MMIO based mechanism, the fields: PASID, bus, device and function of the Process Element Entry have to be filled. (See OpenCAPI Power Platform Architecture document) Hypervisor Process Element Entry Word 0 1 7 8 .. 12 13 ..15 16 19 20 ... 31 0 OSL Configuration State (0:31) 1 OSL Configuration State (32:63) 2 PASID |Reserved 3 Bus | Device|Function |Reserved 4 Reserved 5 Reserved 6 Signed-off-by: Christophe Lombard --- drivers/misc/ocxl/context.c | 4 +++- drivers/misc/ocxl/link.c | 4 +++- drivers/misc/ocxl/ocxl_internal.h | 4 +++- drivers/scsi/cxlflash/ocxl_hw.c | 6 -- include/misc/ocxl.h | 2 +- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index c21f65a5c762..9eb0d93b01c6 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -70,6 +70,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) { int rc; unsigned long pidr = 0; + struct pci_dev *dev; // Locks both status & tidr mutex_lock(>status_mutex); @@ -81,8 +82,9 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) if (mm) pidr = mm->context.id; + dev = to_pci_dev(ctx->afu->fn->dev.parent); rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr, - amr, mm, xsl_fault_error, ctx); + amr, pci_dev_id(dev), mm, xsl_fault_error, ctx); if (rc) goto out; diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index fd73d3bc0eb6..20444db8a2bb 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -494,7 +494,7 @@ static u64 calculate_cfg_state(bool kernel) } int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, + u64 amr, u64 bdf, struct mm_struct *mm, bdf could/should be a u16, since that's per the PCI spec. Fred void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), void *xsl_err_data) { @@ -529,6 +529,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, memset(pe, 0, sizeof(struct ocxl_process_element)); pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); + pe->pasid = cpu_to_be32(pasid << (31 - 19)); + pe->bdf = cpu_to_be32(bdf << (31 - 15)); pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); pe->pid = cpu_to_be32(pidr); pe->tid = cpu_to_be32(tidr); diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 0bad0a123af6..c9ce2af21d6f 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -84,7 +84,9 @@ struct ocxl_context { struct ocxl_process_element { __be64 config_state; - __be32 reserved1[11]; + __be32 pasid; + __be32 bdf; + __be32 reserved1[9]; __be32 lpid; __be32 tid; __be32 pid; diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index e4e0d767b98e..244fc27215dc 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -329,6 +329,7 @@ static int start_context(struct ocxlflash_context *ctx) struct ocxl_hw_afu *afu = ctx->hw_afu; struct ocxl_afu_config *acfg = >acfg; void *link_token = afu->link_token; + struct pci_dev *pdev = afu->pdev; struct device *dev = afu->dev; bool master = ctx->master; struct mm_struct *mm; @@ -360,8 +361,9 @@ static int start_context(struct ocxlflash_context *ctx) mm = current->mm; } - rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, mm, - ocxlflash_xsl_fault, ctx); + rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, + pci_dev_id(pdev), mm, ocxlflash_xsl_fault, + ctx); if (unlikely(rc)) { dev_err(dev, "%s: ocxl_link_add_pe failed rc=%d\n", __func__, rc); diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h index e013736e275d..d0f101f428dd 100644 --- a/include/misc/ocxl.h +++ b/include/misc/ocxl.h @@ -447,7 +447,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle); * defined */ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, + u64 amr, u64 bdf, struct mm_struct *mm, void (*xsl_err_cb)(void
Re: [PATCH V2 2/5] ocxl: Initiate a TLB invalidate command
On 20/11/2020 18:32, Christophe Lombard wrote: When a TLB Invalidate is required for the Logical Partition, the following sequence has to be performed: 1. Load MMIO ATSD AVA register with the necessary value, if required. 2. Write the MMIO ATSD launch register to initiate the TLB Invalidate command. 3. Poll the MMIO ATSD status register to determine when the TLB Invalidate has been completed. Signed-off-by: Christophe Lombard --- arch/powerpc/include/asm/pnv-ocxl.h | 50 arch/powerpc/platforms/powernv/ocxl.c | 55 +++ 2 files changed, 105 insertions(+) diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 3f38aed7100c..9c90e87e7659 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -3,12 +3,59 @@ #ifndef _ASM_PNV_OCXL_H #define _ASM_PNV_OCXL_H +#include #include #define PNV_OCXL_TL_MAX_TEMPLATE63 #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +#define PNV_OCXL_ATSD_TIMEOUT 1 + +/* TLB Management Instructions */ +#define PNV_OCXL_ATSD_LNCH 0x00 +/* Radix Invalidate */ +#define PNV_OCXL_ATSD_LNCH_R PPC_BIT(0) +/* Radix Invalidation Control + * 0b00 Just invalidate TLB. + * 0b01 Invalidate just Page Walk Cache. + * 0b10 Invalidate TLB, Page Walk Cache, and any + * caching of Partition and Process Table Entries. + */ +#define PNV_OCXL_ATSD_LNCH_RIC PPC_BITMASK(1, 2) +/* Number and Page Size of translations to be invalidated */ +#define PNV_OCXL_ATSD_LNCH_LPPPC_BITMASK(3, 10) +/* Invalidation Criteria + * 0b00 Invalidate just the target VA. + * 0b01 Invalidate matching PID. + */ +#define PNV_OCXL_ATSD_LNCH_ISPPC_BITMASK(11, 12) +/* 0b1: Process Scope, 0b0: Partition Scope */ +#define PNV_OCXL_ATSD_LNCH_PRS PPC_BIT(13) +/* Invalidation Flag */ +#define PNV_OCXL_ATSD_LNCH_B PPC_BIT(14) +/* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ +#define PNV_OCXL_ATSD_LNCH_APPPC_BITMASK(15, 17) +/* Defines the large page select + * L=0b0 for 4KB pages + * L=0b1 for large pages) + */ +#define PNV_OCXL_ATSD_LNCH_L PPC_BIT(18) +/* Process ID */ +#define PNV_OCXL_ATSD_LNCH_PID PPC_BITMASK(19, 38) +/* NoFlush – Assumed to be 0b0 */ +#define PNV_OCXL_ATSD_LNCH_F PPC_BIT(39) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SLBIPPC_BIT(40) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON PPC_BIT(41) +#define PNV_OCXL_ATSD_AVA 0x08 +#define PNV_OCXL_ATSD_AVA_AVAPPC_BITMASK(0, 51) +#define PNV_OCXL_ATSD_STAT 0x10 + int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); @@ -31,4 +78,7 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, uint64_t lpcr, void __iomem **arva); void pnv_ocxl_unmap_lpar(void __iomem **arva); +void pnv_ocxl_tlb_invalidate(void __iomem **arva, +unsigned long pid, +unsigned long addr); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index bc20cf867900..07878496954b 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -531,3 +531,58 @@ void pnv_ocxl_unmap_lpar(void __iomem **arva) iounmap(*arva); } EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); + +void pnv_ocxl_tlb_invalidate(void __iomem **arva, Similarly to the previous patch, there's no reason why arva should be a double-pointer. +unsigned long pid, +unsigned long addr) +{ + unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); + uint64_t val = 0ull; + int pend; + + if (!(*arva)) + return; + + if (addr) { + /* load Abbreviated Virtual Address register with +* the necessary value +*/ + val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); + out_be64(*arva + PNV_OCXL_ATSD_AVA, val); + } + + /* Write access initiates a shoot down to initiate the +* TLB Invalidate command +*/ + val = PNV_OCXL_ATSD_LNCH_R; + if (addr) { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b00); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); + } else { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); + val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; +
Re: [PATCH V2 1/5] ocxl: Assign a register set to a Logical Partition
On 20/11/2020 18:32, Christophe Lombard wrote: Platform specific function to assign a register set to a Logical Partition. The "ibm,mmio-atsd" property, provided by the firmware, contains the 16 base ATSD physical addresses (ATSD0 through ATSD15) of the set of MMIO registers (XTS MMIO ATSDx LPARID/AVA/launch/status register). For the time being, the ATSD0 set of registers is used by default. Signed-off-by: Christophe Lombard --- arch/powerpc/include/asm/pnv-ocxl.h | 3 ++ arch/powerpc/platforms/powernv/ocxl.c | 48 +++ 2 files changed, 51 insertions(+) diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index d37ededca3ee..3f38aed7100c 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -28,4 +28,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p void pnv_ocxl_spa_release(void *platform_data); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva); +void pnv_ocxl_unmap_lpar(void __iomem **arva); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index ecdad219d704..bc20cf867900 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -483,3 +483,51 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); + +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + u64 mmio_atsd; + int rc; + + /* ATSD physical address. +* ATSD LAUNCH register: write access initiates a shoot down to +* initiate the TLB Invalidate command. +*/ + rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", + 0, _atsd); + if (rc) { + dev_info(>dev, "No available ATSD found\n"); + return rc; + } + + /* Assign a register set to a Logical Partition and MMIO ATSD +* LPARID register to the required value. +*/ + if (mmio_atsd) If we don't have the "ibm,mmio-atsd", i.e on P9, then we've already exited above. So why not consider mmio_atsd as an error? + rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), + lparid, lpcr); + if (rc) { + dev_err(>dev, "Error mapping device to LPAR: %d\n", rc); + return rc; + } + + if (mmio_atsd) { Same here + *arva = ioremap(mmio_atsd, 24); + if (!(*arva)) { + dev_warn(>dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); + rc = -ENOMEM; + } + } + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); + +void pnv_ocxl_unmap_lpar(void __iomem **arva) The arva argument doesn't need to be a double pointer. Void * is enough. Fred +{ + iounmap(*arva); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
[PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set
On systems without any platform specific PMU driver support registered or Generic Compat PMU support registered, running 'perf record' with —intr-regs will crash ( perf record -I ). The relevant portion from crash logs and Call Trace: Unable to handle kernel paging request for data at address 0x0068 Faulting instruction address: 0xc013eb18 Oops: Kernel access of bad area, sig: 11 [#1] CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1 NIP: c013eb18 LR: c0139f2c CTR: c0393d80 REGS: c004a07ab4f0 TRAP: 0300 Not tainted (4.18.0-193.el8.ppc64le) NIP [c013eb18] is_sier_available+0x18/0x30 LR [c0139f2c] perf_reg_value+0x6c/0xb0 Call Trace: [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable) [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0 [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0 [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0 [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0 [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480 [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520 [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0 [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120 When perf record session started with "-I" option, capture registers via intr-regs, on each sample ‘is_sier_available()'i is called to check for the SIER ( Sample Instruction Event Register) availability in the platform. This function in core-book3s access 'ppmu->flags'. If platform specific pmu driver is not registered, ppmu is set to null and accessing its members results in crash. Patch fixes this by returning false in 'is_sier_available()' if 'ppmu' is not set. Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER") Reported-by: Sachin Sant Signed-off-by: Athira Rajeev --- arch/powerpc/perf/core-book3s.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 08643cb..1de4770 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; -- 1.8.3.1
[PATCH v4] dt-bindings: misc: convert fsl,qoriq-mc from txt to YAML
From: Ionut-robert Aron Convert fsl,qoriq-mc to YAML in order to automate the verification process of dts files. In addition, update MAINTAINERS accordingly and, while at it, add some missing files. Signed-off-by: Ionut-robert Aron [laurentiu.tu...@nxp.com: update MINTAINERS, updates & fixes in schema] Signed-off-by: Laurentiu Tudor --- Changes in v4: - use $ref to point to fsl,qoriq-mc-dpmac binding Changes in v3: - dropped duplicated "fsl,qoriq-mc-dpmac" schema and replaced with reference to it - fixed a dt_binding_check warning Changes in v2: - fixed errors reported by yamllint - dropped multiple unnecessary quotes - used schema instead of text in description - added constraints on dpmac reg property .../devicetree/bindings/misc/fsl,qoriq-mc.txt | 196 -- .../bindings/misc/fsl,qoriq-mc.yaml | 186 + .../ethernet/freescale/dpaa2/overview.rst | 5 +- MAINTAINERS | 4 +- 4 files changed, 193 insertions(+), 198 deletions(-) delete mode 100644 Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt create mode 100644 Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt deleted file mode 100644 index 7b486d4985dc.. --- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt +++ /dev/null @@ -1,196 +0,0 @@ -* Freescale Management Complex - -The Freescale Management Complex (fsl-mc) is a hardware resource -manager that manages specialized hardware objects used in -network-oriented packet processing applications. After the fsl-mc -block is enabled, pools of hardware resources are available, such as -queues, buffer pools, I/O interfaces. These resources are building -blocks that can be used to create functional hardware objects/devices -such as network interfaces, crypto accelerator instances, L2 switches, -etc. - -For an overview of the DPAA2 architecture and fsl-mc bus see: -Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst - -As described in the above overview, all DPAA2 objects in a DPRC share the -same hardware "isolation context" and a 10-bit value called an ICID -(isolation context id) is expressed by the hardware to identify -the requester. - -The generic 'iommus' property is insufficient to describe the relationship -between ICIDs and IOMMUs, so an iommu-map property is used to define -the set of possible ICIDs under a root DPRC and how they map to -an IOMMU. - -For generic IOMMU bindings, see -Documentation/devicetree/bindings/iommu/iommu.txt. - -For arm-smmu binding, see: -Documentation/devicetree/bindings/iommu/arm,smmu.yaml. - -The MSI writes are accompanied by sideband data which is derived from the ICID. -The msi-map property is used to associate the devices with both the ITS -controller and the sideband data which accompanies the writes. - -For generic MSI bindings, see -Documentation/devicetree/bindings/interrupt-controller/msi.txt. - -For GICv3 and GIC ITS bindings, see: -Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml. - -Required properties: - -- compatible -Value type: -Definition: Must be "fsl,qoriq-mc". A Freescale Management Complex -compatible with this binding must have Block Revision -Registers BRR1 and BRR2 at offset 0x0BF8 and 0x0BFC in -the MC control register region. - -- reg -Value type: -Definition: A standard property. Specifies one or two regions -defining the MC's registers: - - -the first region is the command portal for the -this machine and must always be present - - -the second region is the MC control registers. This -region may not be present in some scenarios, such -as in the device tree presented to a virtual machine. - -- ranges -Value type: -Definition: A standard property. Defines the mapping between the child -MC address space and the parent system address space. - -The MC address space is defined by 3 components: - - -Valid values for region type are - 0x0 - MC portals - 0x1 - QBMAN portals - -- #address-cells -Value type: -Definition: Must be 3. (see definition in 'ranges' property) - -- #size-cells -Value type: -Definition: Must be 1. - -Sub-nodes: - -The fsl-mc node may optionally have dpmac sub-nodes that describe -the relationship between the Ethernet MACs which belong to the MC -and the Ethernet PHYs on the system board. - -The dpmac nodes must be under a node named "dpmacs" which contains -the