[PATCH] net/ethernet/freescale: Fix incorrect IS_ERR_VALUE macro usages

2020-11-23 Thread Wei Li
IS_ERR_VALUE macro should be used only with unsigned long type.
Especially it works incorrectly with unsigned shorter types on
64bit machines.

Fixes: 4c35630ccda5 ("[POWERPC] Change rheap functions to use ulongs instead of 
pointers")
Signed-off-by: Wei Li 
---
 drivers/net/ethernet/freescale/ucc_geth.c | 30 +++
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c 
b/drivers/net/ethernet/freescale/ucc_geth.c
index 714b501be7d0..8656d9be256a 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -286,7 +286,7 @@ static int fill_init_enet_entries(struct ucc_geth_private 
*ugeth,
else {
init_enet_offset =
qe_muram_alloc(thread_size, thread_alignment);
-   if (IS_ERR_VALUE(init_enet_offset)) {
+   if (IS_ERR_VALUE((unsigned long)(int)init_enet_offset)) 
{
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM 
memory\n");
qe_put_snum((u8) snum);
@@ -2223,7 +2223,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private 
*ugeth)
ugeth->tx_bd_ring_offset[j] =
qe_muram_alloc(length,
   UCC_GETH_TX_BD_RING_ALIGNMENT);
-   if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j]))
+   if (!IS_ERR_VALUE((unsigned 
long)(int)ugeth->tx_bd_ring_offset[j]))
ugeth->p_tx_bd_ring[j] =
(u8 __iomem *) qe_muram_addr(ugeth->
 tx_bd_ring_offset[j]);
@@ -2300,7 +2300,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private 
*ugeth)
ugeth->rx_bd_ring_offset[j] =
qe_muram_alloc(length,
   UCC_GETH_RX_BD_RING_ALIGNMENT);
-   if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j]))
+   if (!IS_ERR_VALUE((unsigned 
long)(int)ugeth->rx_bd_ring_offset[j]))
ugeth->p_rx_bd_ring[j] =
(u8 __iomem *) qe_muram_addr(ugeth->
 rx_bd_ring_offset[j]);
@@ -2510,7 +2510,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
ugeth->tx_glbl_pram_offset =
qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram),
   UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT);
-   if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) {
+   if (IS_ERR_VALUE((unsigned long)(int)ugeth->tx_glbl_pram_offset)) {
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM memory for 
p_tx_glbl_pram\n");
return -ENOMEM;
@@ -2530,7 +2530,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
   sizeof(struct ucc_geth_thread_data_tx) +
   32 * (numThreadsTxNumerical == 1),
   UCC_GETH_THREAD_DATA_ALIGNMENT);
-   if (IS_ERR_VALUE(ugeth->thread_dat_tx_offset)) {
+   if (IS_ERR_VALUE((unsigned long)(int)ugeth->thread_dat_tx_offset)) {
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM memory for 
p_thread_data_tx\n");
return -ENOMEM;
@@ -2557,7 +2557,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
qe_muram_alloc(ug_info->numQueuesTx *
   sizeof(struct ucc_geth_send_queue_qd),
   UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT);
-   if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) {
+   if (IS_ERR_VALUE((unsigned long)(int)ugeth->send_q_mem_reg_offset)) {
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM memory for 
p_send_q_mem_reg\n");
return -ENOMEM;
@@ -2597,7 +2597,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
ugeth->scheduler_offset =
qe_muram_alloc(sizeof(struct ucc_geth_scheduler),
   UCC_GETH_SCHEDULER_ALIGNMENT);
-   if (IS_ERR_VALUE(ugeth->scheduler_offset)) {
+   if (IS_ERR_VALUE((unsigned long)(int)ugeth->scheduler_offset)) {
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM memory for 
p_scheduler\n");
return -ENOMEM;
@@ -2644,7 +2644,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
qe_muram_alloc(sizeof
   (struct 
ucc_geth_tx_firmware_statistics_pram),
   

[PATCH] net: fs_enet: Fix incorrect IS_ERR_VALUE macro usages

2020-11-23 Thread Wei Li
IS_ERR_VALUE macro should be used only with unsigned long type.
Especially it works incorrectly with unsigned shorter types on
64bit machines.

Fixes: 976de6a8c304 ("fs_enet: Be an of_platform device when 
CONFIG_PPC_CPM_NEW_BINDING is set.")
Fixes: 4c35630ccda5 ("[POWERPC] Change rheap functions to use ulongs instead of 
pointers")
Signed-off-by: Wei Li 
---
 drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 2 +-
 drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c 
b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index b47490be872c..e2117ad46130 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -107,7 +107,7 @@ static int do_pd_setup(struct fs_enet_private *fep)
 
fep->fcc.mem = (void __iomem *)cpm2_immr;
fpi->dpram_offset = cpm_dpalloc(128, 32);
-   if (IS_ERR_VALUE(fpi->dpram_offset)) {
+   if (IS_ERR_VALUE((unsigned long)(int)fpi->dpram_offset)) {
ret = fpi->dpram_offset;
goto out_fcccp;
}
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c 
b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index 64300ac13e02..90f82df0b1bb 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -136,7 +136,7 @@ static int allocate_bd(struct net_device *dev)
 
fep->ring_mem_addr = cpm_dpalloc((fpi->tx_ring + fpi->rx_ring) *
 sizeof(cbd_t), 8);
-   if (IS_ERR_VALUE(fep->ring_mem_addr))
+   if (IS_ERR_VALUE((unsigned long)(int)fep->ring_mem_addr))
return -ENOMEM;
 
fep->ring_base = (void __iomem __force*)
-- 
2.17.1



Re: [PATCH v2 00/19] Add generic vdso_base tracking

2020-11-23 Thread Christophe Leroy




Le 24/11/2020 à 01:29, Dmitry Safonov a écrit :

v2 Changes:
- Rename user_landing to vdso_base as it tracks vDSO VMA start address,
   rather than the explicit address to land (Andy)
- Reword and don't use "new-execed" and "new-born" task (Andy)
- Fix failures reported by build robot

Started from discussion [1], where was noted that currently a couple of
architectures support mremap() for vdso/sigpage, but not munmap().
If an application maps something on the ex-place of vdso/sigpage,
later after processing signal it will land there (good luck!)

Patches set is based on linux-next (next-20201123) and it depends on
changes in x86/cleanups (those reclaim TIF_IA32/TIF_X32) and also
on my changes in akpm (fixing several mremap() issues).


I have a series that cleans up VDSO init on powerpc and migrates powerpc to 
_install_special_mapping() (patch 10 of the series).


https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=204396=%2A=both

I'm wondering how we should coordinate with your series for merging.

I guess your series will also imply removal of arch_unmap() ? see 
https://elixir.bootlin.com/linux/v5.10-rc4/source/arch/powerpc/include/asm/mmu_context.h#L262




Logically, the patches set divides on:
- patch   1: a cleanup for patches in x86/cleanups
- patches  2-11: cleanups for arch_setup_additional_pages()
- patches 12-13: x86 signal changes for unmapped vdso
- patches 14-19: provide generic vdso_base in mm_struct

In the end, besides cleanups, it's now more predictable what happens for
applications with unmapped vdso on architectures those support .mremap()
for vdso/sigpage.

I'm aware of only one user that unmaps vdso - Valgrind [2].
(there possibly are more, but this one is "special", it unmaps vdso, but
  not vvar, which confuses CRIU [Checkpoint Restore In Userspace], that's
  why I'm aware of it)

Patches as a .git branch:
https://github.com/0x7f454c46/linux/tree/setup_additional_pages-v2

v1 Link:
https://lore.kernel.org/lkml/20201108051730.2042693-1-d...@arista.com/

[1]: 
https://lore.kernel.org/linux-arch/cajwjo6zanqykshbq+3b+fi_vt80mtrzev5yreqawx-l8j8x...@mail.gmail.com/
[2]: https://github.com/checkpoint-restore/criu/issues/488



Christophe


[PATCH kernel v4 8/8] powerpc/pci: Remove LSI mappings on device teardown

2020-11-23 Thread Alexey Kardashevskiy
From: Oliver O'Halloran 

When a passthrough IO adapter is removed from a pseries machine using hash
MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS
to clear all page table entries related to the adapter. If some are still
present, the RTAS call which isolates the PCI slot returns error 9001
"valid outstanding translations" and the removal of the IO adapter fails.
This is because when the PHBs are scanned, Linux maps automatically the
INTx interrupts in the Linux interrupt number space but these are never
removed.

This problem can be fixed by adding the corresponding unmap operation when
the device is removed. There's no pcibios_* hook for the remove case, but
the same effect can be achieved using a bus notifier.

Signed-off-by: Oliver O'Halloran 
Reviewed-by: Cédric Le Goater 
Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/kernel/pci-common.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index be108616a721..95f4e173368a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -404,6 +404,27 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
return 0;
 }
 
+static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct pci_dev *pdev = to_pci_dev(data);
+
+   if (action == BUS_NOTIFY_DEL_DEVICE)
+   irq_dispose_mapping(pdev->irq);
+
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_pci_unmap_irq_notifier = {
+   .notifier_call = ppc_pci_unmap_irq_line,
+};
+
+static int ppc_pci_register_irq_notifier(void)
+{
+   return bus_register_notifier(_bus_type, 
_pci_unmap_irq_notifier);
+}
+arch_initcall(ppc_pci_register_irq_notifier);
+
 /*
  * Platform support for /proc/bus/pci/X/Y mmap()s.
  *  -- paulus.
-- 
2.17.1



[PATCH kernel v4 6/8] genirq/irqdomain: Move hierarchical IRQ cleanup to kobject_release

2020-11-23 Thread Alexey Kardashevskiy
This moves hierarchical domain's irqs cleanup into the kobject release
hook to make irq_domain_free_irqs() as simple as kobject_put.

Signed-off-by: Alexey Kardashevskiy 
---
 kernel/irq/irqdomain.c | 43 +-
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4779d912bb86..a0a81cc6c524 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -863,21 +863,9 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping);
  */
 void irq_dispose_mapping(unsigned int virq)
 {
-   struct irq_data *irq_data = irq_get_irq_data(virq);
-   struct irq_domain *domain;
+   struct irq_desc *desc = irq_to_desc(virq);
 
-   if (!virq || !irq_data)
-   return;
-
-   domain = irq_data->domain;
-   if (WARN_ON(domain == NULL))
-   return;
-
-   if (irq_domain_is_hierarchy(domain)) {
-   irq_domain_free_irqs(virq, 1);
-   } else {
-   irq_free_desc(virq);
-   }
+   kobject_put(>kobj);
 }
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
@@ -1396,6 +1384,19 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain 
*domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 }
 
+static void irq_domain_hierarchy_free_desc(struct irq_desc *desc)
+{
+   unsigned int virq = desc->irq_data.irq;
+   struct irq_data *data = irq_get_irq_data(virq);
+
+   mutex_lock(_domain_mutex);
+   irq_domain_remove_irq(virq);
+   irq_domain_free_irqs_hierarchy(data->domain, virq, 1);
+   mutex_unlock(_domain_mutex);
+
+   irq_domain_free_irq_data(virq, 1);
+}
+
 int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
 unsigned int nr_irqs, int node, void *arg,
 const struct irq_affinity_desc *affinity)
@@ -1430,7 +1431,10 @@ int __irq_domain_alloc_irqs_data(struct irq_domain 
*domain, int virq,
}
 
for (i = 0; i < nr_irqs; i++) {
+   struct irq_desc *desc = irq_to_desc(virq + i);
+
irq_domain_insert_irq(virq + i);
+   desc->free_irq = irq_domain_hierarchy_free_desc;
}
mutex_unlock(_domain_mutex);
 
@@ -1675,14 +1679,11 @@ void irq_domain_free_irqs(unsigned int virq, unsigned 
int nr_irqs)
 "NULL pointer, cannot free irq\n"))
return;
 
-   mutex_lock(_domain_mutex);
-   for (i = 0; i < nr_irqs; i++)
-   irq_domain_remove_irq(virq + i);
-   irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs);
-   mutex_unlock(_domain_mutex);
+   for (i = 0; i < nr_irqs; i++) {
+   struct irq_desc *desc = irq_to_desc(virq + i);
 
-   irq_domain_free_irq_data(virq, nr_irqs);
-   irq_free_descs(virq, nr_irqs);
+   kobject_put(>kobj);
+   }
 }
 
 /**
-- 
2.17.1



[PATCH kernel v4 5/8] genirq: Add free_irq hook for IRQ descriptor and use for mapping disposal

2020-11-23 Thread Alexey Kardashevskiy
We want to make the irq_desc.kobj's release hook free associated resources
but we do not want to pollute the irqdesc code with domains.

This adds a free_irq hook which is called when the last reference to
the descriptor is dropped.

The first user is mapped irqs. This potentially can break the existing
users; however they seem to do the right thing and call dispose once
per mapping.

Signed-off-by: Alexey Kardashevskiy 
---
 include/linux/irqdesc.h|  1 +
 include/linux/irqdomain.h  |  2 --
 include/linux/irqhandler.h |  1 +
 kernel/irq/irqdesc.c   |  3 +++
 kernel/irq/irqdomain.c | 14 --
 5 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 5745491303e0..6d44cb6a20ad 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -57,6 +57,7 @@ struct irq_desc {
struct irq_data irq_data;
unsigned int __percpu   *kstat_irqs;
irq_flow_handler_t  handle_irq;
+   irq_free_handler_t  free_irq;
struct irqaction*action;/* IRQ action list */
unsigned intstatus_use_accessors;
unsigned intcore_internal_state__do_not_mess_with_it;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a353b93ddf9e..ccca87cd3d15 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -381,8 +381,6 @@ extern int irq_domain_associate(struct irq_domain *domain, 
unsigned int irq,
 extern void irq_domain_associate_many(struct irq_domain *domain,
  unsigned int irq_base,
  irq_hw_number_t hwirq_base, int count);
-extern void irq_domain_disassociate(struct irq_domain *domain,
-   unsigned int irq);
 
 extern unsigned int irq_create_mapping(struct irq_domain *host,
   irq_hw_number_t hwirq);
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
index c30f454a9518..3dbc2bb764f3 100644
--- a/include/linux/irqhandler.h
+++ b/include/linux/irqhandler.h
@@ -10,5 +10,6 @@
 struct irq_desc;
 struct irq_data;
 typedefvoid (*irq_flow_handler_t)(struct irq_desc *desc);
+typedefvoid (*irq_free_handler_t)(struct irq_desc *desc);
 
 #endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 75374b7944b5..071363da8688 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -427,6 +427,9 @@ static void irq_kobj_release(struct kobject *kobj)
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
unsigned int irq = desc->irq_data.irq;
 
+   if (desc->free_irq)
+   desc->free_irq(desc);
+
irq_remove_debugfs_entry(desc);
unregister_irq_proc(irq, desc);
 
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 805478f81d96..4779d912bb86 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -485,7 +485,7 @@ static void irq_domain_set_mapping(struct irq_domain 
*domain,
}
 }
 
-void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
+static void irq_domain_disassociate(struct irq_domain *domain, unsigned int 
irq)
 {
struct irq_data *irq_data = irq_get_irq_data(irq);
irq_hw_number_t hwirq;
@@ -582,6 +582,13 @@ void irq_domain_associate_many(struct irq_domain *domain, 
unsigned int irq_base,
 }
 EXPORT_SYMBOL_GPL(irq_domain_associate_many);
 
+static void irq_mapped_free_desc(struct irq_desc *desc)
+{
+   unsigned int virq = desc->irq_data.irq;
+
+   irq_domain_disassociate(desc->irq_data.domain, virq);
+}
+
 /**
  * irq_create_direct_mapping() - Allocate an irq for direct mapping
  * @domain: domain to allocate the irq for or NULL for default domain
@@ -638,6 +645,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 {
struct device_node *of_node;
int virq;
+   struct irq_desc *desc;
 
pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
 
@@ -674,6 +682,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
hwirq, of_node_full_name(of_node), virq);
 
+   desc = irq_to_desc(virq);
+   desc->free_irq = irq_mapped_free_desc;
+
return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_mapping);
@@ -865,7 +876,6 @@ void irq_dispose_mapping(unsigned int virq)
if (irq_domain_is_hierarchy(domain)) {
irq_domain_free_irqs(virq, 1);
} else {
-   irq_domain_disassociate(domain, virq);
irq_free_desc(virq);
}
 }
-- 
2.17.1



[PATCH kernel v4 3/8] genirq/irqdomain: Drop unused realloc parameter from __irq_domain_alloc_irqs

2020-11-23 Thread Alexey Kardashevskiy
The two previous patches made @realloc obsolete. This finishes removing it.

Signed-off-by: Alexey Kardashevskiy 
---
 include/linux/irqdomain.h   | 4 +---
 arch/x86/kernel/apic/io_apic.c  | 2 +-
 drivers/gpio/gpiolib.c  | 1 -
 drivers/irqchip/irq-armada-370-xp.c | 2 +-
 drivers/irqchip/irq-bcm2836.c   | 3 +--
 drivers/irqchip/irq-gic-v3.c| 3 +--
 drivers/irqchip/irq-gic-v4.c| 6 ++
 drivers/irqchip/irq-gic.c   | 3 +--
 drivers/irqchip/irq-ixp4xx.c| 1 -
 kernel/irq/ipi.c| 2 +-
 kernel/irq/irqdomain.c  | 4 +---
 kernel/irq/msi.c| 2 +-
 12 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 6cc37bba9951..a353b93ddf9e 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -475,7 +475,6 @@ extern int __irq_domain_alloc_irqs_data(struct irq_domain 
*domain, int virq,
const struct irq_affinity_desc 
*affinity);
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
   unsigned int nr_irqs, int node, void *arg,
-  bool realloc,
   const struct irq_affinity_desc *affinity);
 extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
 extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
@@ -484,8 +483,7 @@ extern void irq_domain_deactivate_irq(struct irq_data 
*irq_data);
 static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
unsigned int nr_irqs, int node, void *arg)
 {
-   return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
-  NULL);
+   return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, NULL);
 }
 
 extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index df9c0ab3a119..5b45f0874571 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -973,7 +973,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, 
int ioapic, u32 gsi,
if (irq == -1 || !legacy)
return __irq_domain_alloc_irqs(domain, irq, 1,
   ioapic_alloc_attr_node(info),
-  info, false, NULL);
+  info, NULL);
 
return __irq_domain_alloc_irqs_data(domain, irq, 1,
ioapic_alloc_attr_node(info),
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 089ddcaa9bc6..b7cfecb5c701 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1059,7 +1059,6 @@ static void gpiochip_set_hierarchical_irqchip(struct 
gpio_chip *gc,
  1,
  NUMA_NO_NODE,
  ,
- false,
  NULL);
if (ret < 0) {
chip_err(gc,
diff --git a/drivers/irqchip/irq-armada-370-xp.c 
b/drivers/irqchip/irq-armada-370-xp.c
index d7eb2e93db8f..bf17eb312669 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -431,7 +431,7 @@ static __init void armada_xp_ipi_init(struct device_node 
*node)
 
irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, IPI_DOORBELL_END,
-  NUMA_NO_NODE, NULL, false, NULL);
+  NUMA_NO_NODE, NULL, NULL);
if (WARN_ON(!base_ipi))
return;
 
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index cbc7c740e4dc..fe9ff90940d3 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -269,8 +269,7 @@ static void __init bcm2836_arm_irqchip_smp_init(void)
irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
 
base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, BITS_PER_MBOX,
-  NUMA_NO_NODE, NULL,
-  false, NULL);
+  NUMA_NO_NODE, NULL, NULL);
 
if (WARN_ON(!base_ipi))
return;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 16fecc0febe8..ff20fd54921f 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1163,8 +1163,7 @@ static void __init gic_smp_init(void)
 
/* Register all 8 non-secure SGIs */
base_sgi = 

[PATCH kernel v4 7/8] genirq/irqdomain: Reference irq_desc for already mapped irqs

2020-11-23 Thread Alexey Kardashevskiy
This references an irq_desc if already mapped interrupt requested to map
again. This happends for PCI legacy interrupts where 4 interrupts are
shared among all devices on the same PCI host bus adapter.

>From now on, the users shall call irq_dispose_mapping() for every
irq_create_fwspec_mapping(). Most (all?) users do not bother with
disposing though so it is not very likely to break many things.

Signed-off-by: Alexey Kardashevskiy 
---
 kernel/irq/irqdomain.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index a0a81cc6c524..07f4bde87de5 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -663,7 +663,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
/* Check if mapping already exists */
virq = irq_find_mapping(domain, hwirq);
if (virq) {
+   desc = irq_to_desc(virq);
pr_debug("-> existing mapping on virq %d\n", virq);
+   kobject_get(>kobj);
return virq;
}
 
@@ -762,6 +764,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec 
*fwspec)
irq_hw_number_t hwirq;
unsigned int type = IRQ_TYPE_NONE;
int virq;
+   struct irq_desc *desc;
 
if (fwspec->fwnode) {
domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
@@ -798,8 +801,11 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec 
*fwspec)
 * current trigger type then we are done so return the
 * interrupt number.
 */
-   if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+   if (type == IRQ_TYPE_NONE || type == 
irq_get_trigger_type(virq)) {
+   desc = irq_to_desc(virq);
+   kobject_get(>kobj);
return virq;
+   }
 
/*
 * If the trigger type has not been set yet, then set
@@ -811,6 +817,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec 
*fwspec)
return 0;
 
irqd_set_trigger_type(irq_data, type);
+   desc = irq_to_desc(virq);
+   kobject_get(>kobj);
return virq;
}
 
-- 
2.17.1



[PATCH kernel v4 0/8] genirq/irqdomain: Add reference counting to IRQs

2020-11-23 Thread Alexey Kardashevskiy
This is another attempt to add reference counting to IRQ
descriptors; or - more to the point - reuse already existing
kobj from irq_desc. This allows the same IRQ to be used several
times (such as legacy PCI INTx) and when disposing those, only
the last reference drop clears the hardware mappings.
Domains do not add references to irq_desc as the whole point of
this exercise is to move actual cleanup in hardware to
the last reference drop. This only changes sparse interrupts
(no idea about the other case yet).

No changelog as it is all completely rewritten. I am still running
tests but I hope this demonstrates the idea.

Some context from Cedric:
The background context for such a need is that the POWER9 and POWER10
processors have a new XIVE interrupt controller which uses MMIO pages
for interrupt management. Each interrupt has a pair of pages which are
required to be unmapped in some environment, like PHB removal. And so,
all interrupts need to be unmmaped.

1/8 .. 3/8 are removing confusing "realloc" which not strictly required
but I was touching this anyway and legacy interrupts should probably use
the new counting anyway;

4/8 .. 6/8 is reordering irq_desc disposal;

7/8 adds extra references (probably missed other places);

8/8 is the fix for the original XIVE bug; it is here for demonstration.

I am cc'ing ppc list so people can pull the patches from that patchworks.

This is based on sha1
418baf2c28f3 Linus Torvalds "Linux 5.10-rc5".

and pushed out to
https://github.com/aik/linux/commits/irqs
sha1 3955f97c448242f6a

Please comment. Thanks.


Alexey Kardashevskiy (7):
  genirq/ipi:  Simplify irq_reserve_ipi
  genirq/irqdomain: Clean legacy IRQ allocation
  genirq/irqdomain: Drop unused realloc parameter from
__irq_domain_alloc_irqs
  genirq: Free IRQ descriptor via embedded kobject
  genirq: Add free_irq hook for IRQ descriptor and use for mapping
disposal
  genirq/irqdomain: Move hierarchical IRQ cleanup to kobject_release
  genirq/irqdomain: Reference irq_desc for already mapped irqs

Oliver O'Halloran (1):
  powerpc/pci: Remove LSI mappings on device teardown

 include/linux/irqdesc.h |   1 +
 include/linux/irqdomain.h   |   9 +-
 include/linux/irqhandler.h  |   1 +
 arch/powerpc/kernel/pci-common.c|  21 
 arch/x86/kernel/apic/io_apic.c  |  13 ++-
 drivers/gpio/gpiolib.c  |   1 -
 drivers/irqchip/irq-armada-370-xp.c |   2 +-
 drivers/irqchip/irq-bcm2836.c   |   3 +-
 drivers/irqchip/irq-gic-v3.c|   3 +-
 drivers/irqchip/irq-gic-v4.c|   6 +-
 drivers/irqchip/irq-gic.c   |   3 +-
 drivers/irqchip/irq-ixp4xx.c|   1 -
 kernel/irq/ipi.c|  16 +--
 kernel/irq/irqdesc.c|  45 +++-
 kernel/irq/irqdomain.c  | 160 +---
 kernel/irq/msi.c|   2 +-
 16 files changed, 158 insertions(+), 129 deletions(-)

-- 
2.17.1



[PATCH kernel v4 4/8] genirq: Free IRQ descriptor via embedded kobject

2020-11-23 Thread Alexey Kardashevskiy
At the moment the IRQ descriptor is freed via the free_desc() helper.
We want to add reference counting to IRQ descriptors and there is already
kobj embedded into irq_desc which we want to reuse.

This shuffles free_desc()/etc to make it simply call kobject_put() and
moves all the cleanup into the kobject_release hook.

As a bonus, we do not need irq_sysfs_del() as kobj removes itself from
sysfs if it knows that it was added.

This should cause no behavioral change.

Signed-off-by: Alexey Kardashevskiy 
---
 kernel/irq/irqdesc.c | 42 --
 1 file changed, 12 insertions(+), 30 deletions(-)

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1a7723604399..75374b7944b5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -295,18 +295,6 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc)
}
 }
 
-static void irq_sysfs_del(struct irq_desc *desc)
-{
-   /*
-* If irq_sysfs_init() has not yet been invoked (early boot), then
-* irq_kobj_base is NULL and the descriptor was never added.
-* kobject_del() complains about a object with no parent, so make
-* it conditional.
-*/
-   if (irq_kobj_base)
-   kobject_del(>kobj);
-}
-
 static int __init irq_sysfs_init(void)
 {
struct irq_desc *desc;
@@ -337,7 +325,6 @@ static struct kobj_type irq_kobj_type = {
 };
 
 static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
-static void irq_sysfs_del(struct irq_desc *desc) {}
 
 #endif /* CONFIG_SYSFS */
 
@@ -419,39 +406,34 @@ static struct irq_desc *alloc_desc(int irq, int node, 
unsigned int flags,
return NULL;
 }
 
-static void irq_kobj_release(struct kobject *kobj)
-{
-   struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
-
-   free_masks(desc);
-   free_percpu(desc->kstat_irqs);
-   kfree(desc);
-}
-
 static void delayed_free_desc(struct rcu_head *rhp)
 {
struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
 
+   free_masks(desc);
+   free_percpu(desc->kstat_irqs);
+   kfree(desc);
+}
+
+static void free_desc(unsigned int irq)
+{
+   struct irq_desc *desc = irq_to_desc(irq);
+
kobject_put(>kobj);
 }
 
-static void free_desc(unsigned int irq)
+static void irq_kobj_release(struct kobject *kobj)
 {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+   unsigned int irq = desc->irq_data.irq;
 
irq_remove_debugfs_entry(desc);
unregister_irq_proc(irq, desc);
 
/*
-* sparse_irq_lock protects also show_interrupts() and
-* kstat_irq_usr(). Once we deleted the descriptor from the
-* sparse tree we can free it. Access in proc will fail to
-* lookup the descriptor.
-*
 * The sysfs entry must be serialized against a concurrent
 * irq_sysfs_init() as well.
 */
-   irq_sysfs_del(desc);
delete_irq_desc(irq);
 
/*
-- 
2.17.1



[PATCH kernel v4 1/8] genirq/ipi: Simplify irq_reserve_ipi

2020-11-23 Thread Alexey Kardashevskiy
__irq_domain_alloc_irqs() can already handle virq==-1 and free
descriptors if it failed allocating hardware interrupts so let's skip
this extra step.

Signed-off-by: Alexey Kardashevskiy 
---
 kernel/irq/ipi.c | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 43e3d1be622c..1b2807318ea9 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -75,18 +75,12 @@ int irq_reserve_ipi(struct irq_domain *domain,
}
}
 
-   virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL);
-   if (virq <= 0) {
-   pr_warn("Can't reserve IPI, failed to alloc descs\n");
-   return -ENOMEM;
-   }
-
-   virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE,
-  (void *) dest, true, NULL);
+   virq = __irq_domain_alloc_irqs(domain, -1, nr_irqs, NUMA_NO_NODE,
+  (void *) dest, false, NULL);
 
if (virq <= 0) {
pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
-   goto free_descs;
+   return -EBUSY;
}
 
for (i = 0; i < nr_irqs; i++) {
@@ -96,10 +90,6 @@ int irq_reserve_ipi(struct irq_domain *domain,
irq_set_status_flags(virq + i, IRQ_NO_BALANCING);
}
return virq;
-
-free_descs:
-   irq_free_descs(virq, nr_irqs);
-   return -EBUSY;
 }
 
 /**
-- 
2.17.1



[PATCH kernel v4 2/8] genirq/irqdomain: Clean legacy IRQ allocation

2020-11-23 Thread Alexey Kardashevskiy
There are 10 users of __irq_domain_alloc_irqs() and only one - IOAPIC -
passes realloc==true. There is no obvious reason for handling this
specific case in the generic code.

This splits out __irq_domain_alloc_irqs_data() to make it clear what
IOAPIC does and makes __irq_domain_alloc_irqs() cleaner.

This should cause no behavioral change.

Signed-off-by: Alexey Kardashevskiy 
---
 include/linux/irqdomain.h  |  3 ++
 arch/x86/kernel/apic/io_apic.c | 13 +++--
 kernel/irq/irqdomain.c | 89 --
 3 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 71535e87109f..6cc37bba9951 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -470,6 +470,9 @@ static inline struct irq_domain 
*irq_domain_add_hierarchy(struct irq_domain *par
   ops, host_data);
 }
 
+extern int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
+   unsigned int nr_irqs, int node, void 
*arg,
+   const struct irq_affinity_desc 
*affinity);
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
   unsigned int nr_irqs, int node, void *arg,
   bool realloc,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7b3c7e0d4a09..df9c0ab3a119 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -970,9 +970,14 @@ static int alloc_irq_from_domain(struct irq_domain 
*domain, int ioapic, u32 gsi,
return -1;
}
 
-   return __irq_domain_alloc_irqs(domain, irq, 1,
-  ioapic_alloc_attr_node(info),
-  info, legacy, NULL);
+   if (irq == -1 || !legacy)
+   return __irq_domain_alloc_irqs(domain, irq, 1,
+  ioapic_alloc_attr_node(info),
+  info, false, NULL);
+
+   return __irq_domain_alloc_irqs_data(domain, irq, 1,
+   ioapic_alloc_attr_node(info),
+   info, NULL);
 }
 
 /*
@@ -1006,7 +1011,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain 
*domain,
return -ENOMEM;
} else {
info->flags |= X86_IRQ_ALLOC_LEGACY;
-   irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
+   irq = __irq_domain_alloc_irqs_data(domain, irq, 1, node, info,
  NULL);
if (irq >= 0) {
irq_data = irq_domain_get_irq_data(domain, irq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cf8b374b892d..ca5c78366c85 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1386,6 +1386,51 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain 
*domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 }
 
+int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
+unsigned int nr_irqs, int node, void *arg,
+const struct irq_affinity_desc *affinity)
+{
+   int i, ret;
+
+   if (domain == NULL) {
+   domain = irq_default_domain;
+   if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+   return -EINVAL;
+   }
+
+   if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
+   pr_debug("cannot allocate memory for IRQ%d\n", virq);
+   ret = -ENOMEM;
+   goto out_free_irq_data;
+   }
+
+   mutex_lock(_domain_mutex);
+   ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
+   if (ret < 0) {
+   mutex_unlock(_domain_mutex);
+   goto out_free_irq_data;
+   }
+
+   for (i = 0; i < nr_irqs; i++) {
+   ret = irq_domain_trim_hierarchy(virq + i);
+   if (ret) {
+   mutex_unlock(_domain_mutex);
+   goto out_free_irq_data;
+   }
+   }
+
+   for (i = 0; i < nr_irqs; i++) {
+   irq_domain_insert_irq(virq + i);
+   }
+   mutex_unlock(_domain_mutex);
+
+   return virq;
+
+out_free_irq_data:
+   irq_domain_free_irq_data(virq, nr_irqs);
+   return ret;
+}
+
 /**
  * __irq_domain_alloc_irqs - Allocate IRQs from domain
  * @domain:domain to allocate from
@@ -1412,7 +1457,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, 
int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc 
*affinity)
 {
-   int i, ret, virq;
+   int ret, virq;
 
if 

Re: [PATCH v3 0/6] ibmvfc: Protocol definition updates and new targetWWPN Support

2020-11-23 Thread Martin K. Petersen
On Tue, 17 Nov 2020 19:10:58 -0600, Tyrel Datwyler wrote:

> Several Management Datagrams (MADs) have been reversioned to add a targetWWPN
> field that is intended to better identify a target over in place of the 
> scsi_id.
> This patchset adds the new protocol definitions and implements support for 
> using
> the new targetWWPN field and exposing the capability to the VIOS. This
> targetWWPN support is a prerequisuite for upcoming channelization/MQ support.
> 
> changes in v3:
> * addressed field naming consistency in Patches 2 & 5 in response to [brking]
> * fixed commit log typos
> * fixed bad rebase of Patch 4 such that it now compiles
> 
> [...]

Applied to 5.11/scsi-queue, thanks!

[1/6] scsi: ibmvfc: Deduplicate common ibmvfc_cmd init code
  https://git.kernel.org/mkp/scsi/c/fad74a1be2db
[2/6] scsi: ibmvfc: Add new fields for version 2 of several MADs
  https://git.kernel.org/mkp/scsi/c/c16b8a6d8af1
[3/6] scsi: ibmvfc: Add helper for testing capability flags
  https://git.kernel.org/mkp/scsi/c/a318c2b71cce
[4/6] scsi: ibmvfc: Add FC payload retrieval routines for versioned vfcFrames
  https://git.kernel.org/mkp/scsi/c/5a9d16f71c26
[5/6] scsi: ibmvfc: Add support for target_wwpn field in v2 MADs and vfcFrame
  https://git.kernel.org/mkp/scsi/c/ebc7c74bd2dc
[6/6] scsi: ibmvfc: Advertise client support for targetWWPN using v2 commands
  https://git.kernel.org/mkp/scsi/c/e4af87b7079e

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events

2020-11-23 Thread Madhavan Srinivasan



On 11/24/20 10:21 AM, Namhyung Kim wrote:

Hello,

On Mon, Nov 23, 2020 at 8:00 PM Michael Ellerman  wrote:

Namhyung Kim  writes:

Hi Peter and Kan,

(Adding PPC folks)

On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim  wrote:

Hello,

On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan  wrote:



On 11/11/2020 11:25 AM, Peter Zijlstra wrote:

On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote:


- When the large PEBS was introduced (9c964efa4330), the sched_task() should
be invoked to flush the PEBS buffer in each context switch. However, The
perf_sched_events in account_event() is not updated accordingly. The
perf_event_task_sched_* never be invoked for a pure per-CPU context. Only
per-task event works.
 At that time, the perf_pmu_sched_task() is outside of
perf_event_context_sched_in/out. It means that perf has to double
perf_pmu_disable() for per-task event.
- The patch 1 tries to fix broken per-CPU events. The CPU context cannot be
retrieved from the task->perf_event_ctxp. So it has to be tracked in the
sched_cb_list. Yes, the code is very similar to the original codes, but it
is actually the new code for per-CPU events. The optimization for per-task
events is still kept.
For the case, which has both a CPU context and a task context, yes, the
__perf_pmu_sched_task() in this patch is not invoked. Because the
sched_task() only need to be invoked once in a context switch. The
sched_task() will be eventually invoked in the task context.

The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB and
only set that for large pebs. Are you sure the other users (Intel LBR
and PowerPC BHRB) don't need it?

I didn't set it for LBR, because the perf_sched_events is always enabled
for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB
for LBR.

 if (has_branch_stack(event))
 inc = true;


If they indeed do not require the pmu::sched_task() callback for CPU
events, then I still think the whole perf_sched_cb_{inc,dec}() interface

No, LBR requires the pmu::sched_task() callback for CPU events.

Now, The LBR registers have to be reset in sched in even for CPU events.

To fix the shorter LBR callstack issue for CPU events, we also need to
save/restore LBRs in pmu::sched_task().
https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.li...@linux.intel.com/


is confusing at best.

Can't we do something like this instead?


I think the below patch may have two issues.
- PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as well) now.
- We may disable the large PEBS later if not all PEBS events support
large PEBS. The PMU need a way to notify the generic code to decrease
the nr_sched_task.

Any updates on this?  I've reviewed and tested Kan's patches
and they all look good.

Maybe we can talk to PPC folks to confirm the BHRB case?

Can we move this forward?  I saw patch 3/3 also adds PERF_ATTACH_SCHED_CB
for PowerPC too.  But it'd be nice if ppc folks can confirm the change.

Sorry I've read the whole thread, but I'm still not entirely sure I
understand the question.

Thanks for your time and sorry about not being clear enough.

We found per-cpu events are not calling pmu::sched_task()
on context switches.  So PERF_ATTACH_SCHED_CB was
added to indicate the core logic that it needs to invoke the
callback.

The patch 3/3 added the flag to PPC (for BHRB) with other
changes (I think it should be split like in the patch 2/3) and
want to get ACKs from the PPC folks.


Sorry for delay.

I guess first it will be better to split the ppc change to a separate patch,

secondly, we are missing the changes needed in the power_pmu_bhrb_disable()

where perf_sched_cb_dec() needs the "state" to be included.


Maddy




Thanks,
Namhyung


Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events

2020-11-23 Thread Namhyung Kim
Hello,

On Mon, Nov 23, 2020 at 8:00 PM Michael Ellerman  wrote:
>
> Namhyung Kim  writes:
> > Hi Peter and Kan,
> >
> > (Adding PPC folks)
> >
> > On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim  wrote:
> >>
> >> Hello,
> >>
> >> On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan  
> >> wrote:
> >> >
> >> >
> >> >
> >> > On 11/11/2020 11:25 AM, Peter Zijlstra wrote:
> >> > > On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote:
> >> > >
> >> > >> - When the large PEBS was introduced (9c964efa4330), the sched_task() 
> >> > >> should
> >> > >> be invoked to flush the PEBS buffer in each context switch. However, 
> >> > >> The
> >> > >> perf_sched_events in account_event() is not updated accordingly. The
> >> > >> perf_event_task_sched_* never be invoked for a pure per-CPU context. 
> >> > >> Only
> >> > >> per-task event works.
> >> > >> At that time, the perf_pmu_sched_task() is outside of
> >> > >> perf_event_context_sched_in/out. It means that perf has to double
> >> > >> perf_pmu_disable() for per-task event.
> >> > >
> >> > >> - The patch 1 tries to fix broken per-CPU events. The CPU context 
> >> > >> cannot be
> >> > >> retrieved from the task->perf_event_ctxp. So it has to be tracked in 
> >> > >> the
> >> > >> sched_cb_list. Yes, the code is very similar to the original codes, 
> >> > >> but it
> >> > >> is actually the new code for per-CPU events. The optimization for 
> >> > >> per-task
> >> > >> events is still kept.
> >> > >>For the case, which has both a CPU context and a task context, 
> >> > >> yes, the
> >> > >> __perf_pmu_sched_task() in this patch is not invoked. Because the
> >> > >> sched_task() only need to be invoked once in a context switch. The
> >> > >> sched_task() will be eventually invoked in the task context.
> >> > >
> >> > > The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB and
> >> > > only set that for large pebs. Are you sure the other users (Intel LBR
> >> > > and PowerPC BHRB) don't need it?
> >> >
> >> > I didn't set it for LBR, because the perf_sched_events is always enabled
> >> > for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB
> >> > for LBR.
> >> >
> >> > if (has_branch_stack(event))
> >> > inc = true;
> >> >
> >> > >
> >> > > If they indeed do not require the pmu::sched_task() callback for CPU
> >> > > events, then I still think the whole perf_sched_cb_{inc,dec}() 
> >> > > interface
> >> >
> >> > No, LBR requires the pmu::sched_task() callback for CPU events.
> >> >
> >> > Now, The LBR registers have to be reset in sched in even for CPU events.
> >> >
> >> > To fix the shorter LBR callstack issue for CPU events, we also need to
> >> > save/restore LBRs in pmu::sched_task().
> >> > https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.li...@linux.intel.com/
> >> >
> >> > > is confusing at best.
> >> > >
> >> > > Can't we do something like this instead?
> >> > >
> >> > I think the below patch may have two issues.
> >> > - PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as well) 
> >> > now.
> >> > - We may disable the large PEBS later if not all PEBS events support
> >> > large PEBS. The PMU need a way to notify the generic code to decrease
> >> > the nr_sched_task.
> >>
> >> Any updates on this?  I've reviewed and tested Kan's patches
> >> and they all look good.
> >>
> >> Maybe we can talk to PPC folks to confirm the BHRB case?
> >
> > Can we move this forward?  I saw patch 3/3 also adds PERF_ATTACH_SCHED_CB
> > for PowerPC too.  But it'd be nice if ppc folks can confirm the change.
>
> Sorry I've read the whole thread, but I'm still not entirely sure I
> understand the question.

Thanks for your time and sorry about not being clear enough.

We found per-cpu events are not calling pmu::sched_task()
on context switches.  So PERF_ATTACH_SCHED_CB was
added to indicate the core logic that it needs to invoke the
callback.

The patch 3/3 added the flag to PPC (for BHRB) with other
changes (I think it should be split like in the patch 2/3) and
want to get ACKs from the PPC folks.

Thanks,
Namhyung


Re: [PATCH 1/3] ibmvfc: byte swap login_buf.resp values in attribute show functions

2020-11-23 Thread Martin K. Petersen
On Tue, 17 Nov 2020 12:50:29 -0600, Tyrel Datwyler wrote:

> Both ibmvfc_show_host_(capabilities|npiv_version) functions retrieve
> values from vhost->login_buf.resp buffer. This is the MAD response
> buffer from the VIOS and as such any multi-byte non-string values are in
> big endian format.
> 
> Byte swap these values to host cpu endian format for better human
> readability.

Applied to 5.11/scsi-queue, thanks!

[1/3] scsi: ibmvfc: Byte swap login_buf.resp values in attribute show functions
  https://git.kernel.org/mkp/scsi/c/61bdb4eec8d1
[2/3] scsi: ibmvfc: Remove trailing semicolon
  https://git.kernel.org/mkp/scsi/c/4e0716199ab6
[3/3] scsi: ibmvfc: Use correlation token to tag commands
  https://git.kernel.org/mkp/scsi/c/2aa0102c6688

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH v3 3/3] powerpc/64s: feature: Work around inline asm issues

2020-11-23 Thread Michael Ellerman
Bill Wendling  writes:
> On Mon, Nov 23, 2020 at 12:10 PM Segher Boessenkool
>  wrote:
>> On Mon, Nov 23, 2020 at 12:01:01PM -0800, Bill Wendling wrote:
>> > On Mon, Nov 23, 2020 at 11:58 AM Segher Boessenkool
>> >  wrote:
>> > > > On Sun, Nov 22, 2020 at 10:36 PM Segher Boessenkool
>> > > >  wrote:
>> > > > > "true" (as a result of a comparison) in as is -1, not 1.
>> > >
>> > > On Mon, Nov 23, 2020 at 11:43:11AM -0800, Bill Wendling wrote:
>> > > > What Segher said. :-) Also, if you reverse the comparison, you'll get
>> > > > a build error.
>> > >
>> > > But that means your patch is the wrong way around?
>> > >
>> > > -   .ifgt (label##4b- label##3b)-(label##2b- label##1b);\
>> > > -   .error "Feature section else case larger than body";\
>> > > -   .endif; \
>> > > +   .org . - ((label##4b-label##3b) > (label##2b-label##1b)); \
>> > >
>> > > It should be a + in that last line, not a -.
>> >
>> > I said so in a follow up email.
>>
>> Yeah, and that arrived a second after I pressed "send" :-)
>>
> Michael, I apologize for the churn with these patches. I believe the
> policy is to resend the match as "v4", correct?
>
> I ran tests with the change above. It compiled with no error. If I
> switch the labels around to ".org . + ((label##2b-label##1b) >
> (label##4b-label##3b))", then it fails as expected.

I wanted to retain the nicer error reporting for gcc builds, so I did it
like this:

diff --git a/arch/powerpc/include/asm/feature-fixups.h 
b/arch/powerpc/include/asm/feature-fixups.h
index b0af97add751..c4ad33074df5 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -36,6 +36,24 @@ label##2:\
.align 2;   \
 label##3:
 
+
+#ifndef CONFIG_CC_IS_CLANG
+#define CHECK_ALT_SIZE(else_size, body_size)   \
+   .ifgt (else_size) - (body_size);\
+   .error "Feature section else case larger than body";\
+   .endif;
+#else
+/*
+ * If we use the ifgt syntax above, clang's assembler complains about the
+ * expression being non-absolute when the code appears in an inline assembly
+ * statement.
+ * As a workaround use an .org directive that has no effect if the else case
+ * instructions are smaller than the body, but fails otherwise.
+ */
+#define CHECK_ALT_SIZE(else_size, body_size)   \
+   .org . + ((else_size) > (body_size));
+#endif
+
 #define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect)  \
 label##4:  \
.popsection;\
@@ -48,9 +66,7 @@ label##5: 
\
FTR_ENTRY_OFFSET label##2b-label##5b;   \
FTR_ENTRY_OFFSET label##3b-label##5b;   \
FTR_ENTRY_OFFSET label##4b-label##5b;   \
-   .ifgt (label##4b- label##3b)-(label##2b- label##1b);\
-   .error "Feature section else case larger than body";\
-   .endif; \
+   CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \
.popsection;
 
 

I've pushed a branch with all your patches applied to:

  https://github.com/linuxppc/linux/commits/next-test


Are you able to give that a quick test? It builds clean with clang for
me, but we must be using different versions of clang because my branch
already builds clean for me even without your patches.

cheers


[PATCH V2] powerpc/perf: Fix crash with is_sier_available when pmu is not set

2020-11-23 Thread Athira Rajeev
On systems without any specific PMU driver support registered, running
'perf record' with —intr-regs  will crash ( perf record -I  ).

The relevant portion from crash logs and Call Trace:

Unable to handle kernel paging request for data at address 0x0068
Faulting instruction address: 0xc013eb18
Oops: Kernel access of bad area, sig: 11 [#1]
CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1
NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
NIP [c013eb18] is_sier_available+0x18/0x30
LR [c0139f2c] perf_reg_value+0x6c/0xb0
Call Trace:
[c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
[c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
[c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
[c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
[c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
[c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
[c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
[c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
[c004a07abe30] [c00091c4] decrementer_common+0x114/0x120

When perf record session is started with "-I" option, capturing registers
on each sample calls is_sier_available() to check for the
SIER (Sample Instruction Event Register) availability in the platform.
This function in core-book3s accesses 'ppmu->flags'. If a platform specific
PMU driver is not registered, ppmu is set to NULL and accessing its
members results in a crash. Fix the crash by returning false in
is_sier_available() if ppmu is not set.

Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER")
Reported-by: Sachin Sant 
Signed-off-by: Athira Rajeev 
---
Changes in v2:
- Corrected the commit message as suggested by
  Michael Ellerman.

 arch/powerpc/perf/core-book3s.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..1de4770 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
 
 bool is_sier_available(void)
 {
+   if (!ppmu)
+   return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
 
-- 
1.8.3.1



[powerpc:fixes-test] BUILD SUCCESS b6b79dd53082db11070b4368d85dd6699ff0b063

2020-11-23 Thread kernel test robot
 haps_hs_smp_defconfig
mips decstation_defconfig
m68k allmodconfig
armxcep_defconfig
powerpc kilauea_defconfig
powerpc mpc8540_ads_defconfig
armmagician_defconfig
mips   lemote2f_defconfig
arc defconfig
sh  urquell_defconfig
sh  sdk7780_defconfig
alpha   defconfig
sh ecovec24_defconfig
powerpc  arches_defconfig
powerpc   currituck_defconfig
arm  pxa168_defconfig
arm   omap2plus_defconfig
sh  rsk7269_defconfig
powerpc  katmai_defconfig
arm  exynos_defconfig
powerpc pq2fads_defconfig
powerpcsocrates_defconfig
mips db1xxx_defconfig
arm   corgi_defconfig
powerpc   ppc64_defconfig
powerpc taishan_defconfig
mips   ci20_defconfig
arc nsimosci_hs_defconfig
xtensa  audio_kc705_defconfig
mips   rbtx49xx_defconfig
armmps2_defconfig
c6xevmc6474_defconfig
armspear3xx_defconfig
xtensageneric_kc705_defconfig
mips  rb532_defconfig
powerpc canyonlands_defconfig
ia64 allmodconfig
ia64 allyesconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
i386 randconfig-a004-20201123
i386 randconfig-a003-20201123
i386 randconfig-a002-20201123
i386 randconfig-a005-20201123
i386 randconfig-a001-20201123
i386 randconfig-a006-20201123
x86_64   randconfig-a015-20201123
x86_64   randconfig-a011-20201123
x86_64   randconfig-a014-20201123
x86_64   randconfig-a016-20201123
x86_64   randconfig-a012-20201123
x86_64   randconfig-a013-20201123
i386 randconfig-a012-20201123
i386 randconfig-a013-20201123
i386 randconfig-a011-20201123
i386 randconfig-a016-20201123
i386 randconfig-a014-20201123
i386 randconfig-a015-20201123
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a006-20201123
x86_64   randconfig-a003-20201123
x86_64   randconfig-a004-20201123
x86_64   randconfig-a005-20201123
x86_64   randconfig-a002-20201123
x86_64   randconfig-a001-20201123

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[powerpc:next-test] BUILD SUCCESS c250581fcf84c34cbaf5b535512b60a5e96970f6

2020-11-23 Thread kernel test robot
  rv32_defconfig
mips decstation_defconfig
armmvebu_v5_defconfig
powerpc  pcm030_defconfig
armmagician_defconfig
umkunit_defconfig
powerpc ppa8548_defconfig
mips   lemote2f_defconfig
mipsar7_defconfig
sparc   sparc64_defconfig
arc defconfig
sh  urquell_defconfig
sh ecovec24_defconfig
powerpc pq2fads_defconfig
powerpcsocrates_defconfig
arm   corgi_defconfig
powerpc   ppc64_defconfig
powerpc taishan_defconfig
mips   ci20_defconfig
arc nsimosci_hs_defconfig
xtensa  audio_kc705_defconfig
mips   rbtx49xx_defconfig
armmps2_defconfig
c6xevmc6474_defconfig
armspear3xx_defconfig
mips  rb532_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
x86_64   randconfig-a006-20201120
x86_64   randconfig-a003-20201120
x86_64   randconfig-a004-20201120
x86_64   randconfig-a005-20201120
x86_64   randconfig-a001-20201120
x86_64   randconfig-a002-20201120
i386 randconfig-a004-20201123
i386 randconfig-a003-20201123
i386 randconfig-a002-20201123
i386 randconfig-a005-20201123
i386 randconfig-a001-20201123
i386 randconfig-a006-20201123
i386 randconfig-a004-20201120
i386 randconfig-a003-20201120
i386 randconfig-a002-20201120
i386 randconfig-a005-20201120
i386 randconfig-a001-20201120
i386 randconfig-a006-20201120
x86_64   randconfig-a015-20201123
x86_64   randconfig-a011-20201123
x86_64   randconfig-a014-20201123
x86_64   randconfig-a016-20201123
x86_64   randconfig-a012-20201123
x86_64   randconfig-a013-20201123
i386 randconfig-a012-20201120
i386 randconfig-a013-20201120
i386 randconfig-a011-20201120
i386 randconfig-a016-20201120
i386 randconfig-a014-20201120
i386 randconfig-a015-20201120
i386 randconfig-a012-20201123
i386 randconfig-a013-20201123
i386 randconfig-a011-20201123
i386 randconfig-a016-20201123
i386 randconfig-a014-20201123
i386 randconfig-a015-20201123
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a006-20201123
x86_64   randconfig-a003-20201123
x86_64   randconfig-a004-20201123
x86_64   randconfig-a005-20201123
x86_64   randconfig-a002-20201123
x86_64   randconfig-a001-20201123
x86_64   randconfig-a015-20201120
x86_64   randconfig-a011-20201120
x86_64

[powerpc:merge] BUILD SUCCESS 7c94b5d4e9d328a69d43a11d7e3dfd7a6d762cb6

2020-11-23 Thread kernel test robot
 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a004-20201123
i386 randconfig-a003-20201123
i386 randconfig-a002-20201123
i386 randconfig-a005-20201123
i386 randconfig-a001-20201123
i386 randconfig-a006-20201123
x86_64   randconfig-a015-20201123
x86_64   randconfig-a011-20201123
x86_64   randconfig-a014-20201123
x86_64   randconfig-a016-20201123
x86_64   randconfig-a012-20201123
x86_64   randconfig-a013-20201123
i386 randconfig-a012-20201123
i386 randconfig-a013-20201123
i386 randconfig-a011-20201123
i386 randconfig-a016-20201123
i386 randconfig-a014-20201123
i386 randconfig-a015-20201123
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a006-20201123
x86_64   randconfig-a003-20201123
x86_64   randconfig-a004-20201123
x86_64   randconfig-a005-20201123
x86_64   randconfig-a002-20201123
x86_64   randconfig-a001-20201123

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH v3] soc: fsl: dpio: Get the cpumask through cpumask_of(cpu)

2020-11-23 Thread Li Yang
On Mon, Oct 19, 2020 at 9:15 PM Yi Wang  wrote:
>
> From: Hao Si 
>
> The local variable 'cpumask_t mask' is in the stack memory, and its address
> is assigned to 'desc->affinity' in 'irq_set_affinity_hint()'.
> But the memory area where this variable is located is at risk of being
> modified.
>
> During LTP testing, the following error was generated:
>
> Unable to handle kernel paging request at virtual address 12e9b790
> Mem abort info:
>   ESR = 0x9607
>   Exception class = DABT (current EL), IL = 32 bits
>   SET = 0, FnV = 0
>   EA = 0, S1PTW = 0
> Data abort info:
>   ISV = 0, ISS = 0x0007
>   CM = 0, WnR = 0
> swapper pgtable: 4k pages, 48-bit VAs, pgdp = 75ac5e07
> [12e9b790] pgd=0027dbffe003, pud=0027dbffd003,
> pmd=0027b6d61003, pte=
> Internal error: Oops: 9607 [#1] PREEMPT SMP
> Modules linked in: xt_conntrack
> Process read_all (pid: 20171, stack limit = 0x44ea4095)
> CPU: 14 PID: 20171 Comm: read_all Tainted: GB   W
> Hardware name: NXP Layerscape LX2160ARDB (DT)
> pstate: 8085 (Nzcv daIf -PAN -UAO)
> pc : irq_affinity_hint_proc_show+0x54/0xb0
> lr : irq_affinity_hint_proc_show+0x4c/0xb0
> sp : 1138bc10
> x29: 1138bc10 x28: d131d1e0
> x27: 007000c0 x26: 8025b9480dc0
> x25: 8025b9480da8 x24: 03ff
> x23: 8027334f8300 x22: 80272e97d000
> x21: 80272e97d0b0 x20: 8025b9480d80
> x19: 09a49000 x18: 
> x17:  x16: 
> x15:  x14: 
> x13:  x12: 0040
> x11:  x10: 802735b79b88
> x9 :  x8 : 
> x7 : 09a49848 x6 : 0003
> x5 :  x4 : 08157d6c
> x3 : 1138bc10 x2 : 12e9b790
> x1 :  x0 : 
> Call trace:
>  irq_affinity_hint_proc_show+0x54/0xb0
>  seq_read+0x1b0/0x440
>  proc_reg_read+0x80/0xd8
>  __vfs_read+0x60/0x178
>  vfs_read+0x94/0x150
>  ksys_read+0x74/0xf0
>  __arm64_sys_read+0x24/0x30
>  el0_svc_common.constprop.0+0xd8/0x1a0
>  el0_svc_handler+0x34/0x88
>  el0_svc+0x10/0x14
> Code: f9001bbf 943e0732 f94066c2 b462 (f9400041)
> ---[ end trace b495bdcb0b3b732b ]---
> Kernel panic - not syncing: Fatal exception
> SMP: stopping secondary CPUs
> SMP: failed to stop secondary CPUs 0,2-4,6,8,11,13-15
> Kernel Offset: disabled
> CPU features: 0x0,21006008
> Memory Limit: none
> ---[ end Kernel panic - not syncing: Fatal exception ]---
>
> Fix it by using 'cpumask_of(cpu)' to get the cpumask.
>
> Signed-off-by: Hao Si 
> Signed-off-by: Lin Chen 
> Signed-off-by: Yi Wang 

Applied for fix.  Thanks.

> ---
> v3: Use cpumask_of(cpu) to get the pre-defined cpumask in the static
> cpu_bit_bitmap array.
> v2: Place 'cpumask_t mask' in the driver's private data and while at it,
> rename it to cpu_mask.
>
>  drivers/soc/fsl/dpio/dpio-driver.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/drivers/soc/fsl/dpio/dpio-driver.c 
> b/drivers/soc/fsl/dpio/dpio-driver.c
> index 7b642c3..7f397b4 100644
> --- a/drivers/soc/fsl/dpio/dpio-driver.c
> +++ b/drivers/soc/fsl/dpio/dpio-driver.c
> @@ -95,7 +95,6 @@ static int register_dpio_irq_handlers(struct fsl_mc_device 
> *dpio_dev, int cpu)
>  {
> int error;
> struct fsl_mc_device_irq *irq;
> -   cpumask_t mask;
>
> irq = dpio_dev->irqs[0];
> error = devm_request_irq(_dev->dev,
> @@ -112,9 +111,7 @@ static int register_dpio_irq_handlers(struct 
> fsl_mc_device *dpio_dev, int cpu)
> }
>
> /* set the affinity hint */
> -   cpumask_clear();
> -   cpumask_set_cpu(cpu, );
> -   if (irq_set_affinity_hint(irq->msi_desc->irq, ))
> +   if (irq_set_affinity_hint(irq->msi_desc->irq, cpumask_of(cpu)))
> dev_err(_dev->dev,
> "irq_set_affinity failed irq %d cpu %d\n",
> irq->msi_desc->irq, cpu);
> --
> 2.15.2


Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too

2020-11-23 Thread Michael Ellerman
Oleg Nesterov  writes:
> Christophe, et al,
>
> So what?
>
> Are you going to push your change or should I re-send 1-2 without
> whitespace cleanups?

I'll take your 1 & 2 and fixup the whitespace issues when applying.

cheers

> On 11/19, Oleg Nesterov wrote:
>>
>> On 11/19, Christophe Leroy wrote:
>> >
>> > I think the following should work, and not require the first patch (compile
>> > tested only).
>> >
>> > --- a/arch/powerpc/kernel/ptrace/ptrace-view.c
>> > +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
>> > @@ -234,9 +234,21 @@ static int gpr_get(struct task_struct *target, const
>> > struct user_regset *regset,
>> >BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
>> > offsetof(struct pt_regs, msr) + sizeof(long));
>> > 
>> > +#ifdef CONFIG_PPC64
>> > +  membuf_write(, >thread.regs->orig_gpr3,
>> > +   offsetof(struct pt_regs, softe) - offsetof(struct pt_regs,
>> > orig_gpr3));
>> > +  membuf_store(, 1UL);
>> > +
>> > +  BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
>> > +   offsetof(struct pt_regs, softe) + sizeof(long));
>> > +
>> > +  membuf_write(, >thread.regs->trap,
>> > +   sizeof(struct user_pt_regs) - offsetof(struct pt_regs, 
>> > trap));
>> > +#else
>> >membuf_write(, >thread.regs->orig_gpr3,
>> >sizeof(struct user_pt_regs) -
>> >offsetof(struct pt_regs, orig_gpr3));
>> > +#endif
>> >return membuf_zero(, ELF_NGREG * sizeof(unsigned long) -
>> > sizeof(struct user_pt_regs));
>> >  }
>> 
>> Probably yes.
>> 
>> This mirrors the previous patch I sent 
>> (https://lore.kernel.org/lkml/20190917143753.ga12...@redhat.com/)
>> and this is exactly what I tried to avoid, we can make a simpler fix now.
>> 
>> But let me repeat, I agree with any fix even if imp my version simplifies 
>> the code, just
>> commit this change and lets forget this problem.
>> 
>> Oleg.


Re: [PATCH 25/25] soc: fsl: qbman: qman: Remove unused variable 'dequeue_wq'

2020-11-23 Thread Li Yang
Hi Roy,

On Tue, Nov 3, 2020 at 9:31 AM Lee Jones  wrote:
>
> Fixes the following W=1 kernel build warning(s):
>
>  drivers/soc/fsl/qbman/qman.c: In function ‘qman_shutdown_fq’:
>  drivers/soc/fsl/qbman/qman.c:2700:8: warning: variable ‘dequeue_wq’ set but 
> not used [-Wunused-but-set-variable]
>
> Cc: Li Yang 
> Cc: YueHaibing 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones 
> ---
>  drivers/soc/fsl/qbman/qman.c | 8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
> index 9888a70618730..62b182c3a8b04 100644
> --- a/drivers/soc/fsl/qbman/qman.c
> +++ b/drivers/soc/fsl/qbman/qman.c
> @@ -2622,7 +2622,7 @@ int qman_shutdown_fq(u32 fqid)
> union qm_mc_command *mcc;
> union qm_mc_result *mcr;
> int orl_empty, drain = 0, ret = 0;
> -   u32 channel, wq, res;
> +   u32 channel, res;
> u8 state;
>
> p = get_affine_portal();
> @@ -2655,7 +2655,7 @@ int qman_shutdown_fq(u32 fqid)
> DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_QUERYFQ);
> /* Need to store these since the MCR gets reused */
> channel = qm_fqd_get_chan(>queryfq.fqd);
> -   wq = qm_fqd_get_wq(>queryfq.fqd);
> +   qm_fqd_get_wq(>queryfq.fqd);

This probably is not needed also.

>
> if (channel < qm_channel_pool1) {
> channel_portal = get_portal_for_channel(channel);
> @@ -2697,7 +2697,6 @@ int qman_shutdown_fq(u32 fqid)
>  * to dequeue from the channel the FQ is scheduled on
>  */
> int found_fqrn = 0;
> -   u16 dequeue_wq = 0;
>
> /* Flag that we need to drain FQ */
> drain = 1;
> @@ -2705,11 +2704,8 @@ int qman_shutdown_fq(u32 fqid)
> if (channel >= qm_channel_pool1 &&
> channel < qm_channel_pool1 + 15) {
> /* Pool channel, enable the bit in the portal 
> */
> -   dequeue_wq = (channel -
> - qm_channel_pool1 + 1)<<4 | wq;
> } else if (channel < qm_channel_pool1) {
> /* Dedicated channel */
> -   dequeue_wq = wq;

With these gone, these if statements seem to be redundant.  Can you
propose an additional patch to further cleanup the code here?  Thanks.

> } else {
> dev_err(dev, "Can't recover FQ 0x%x, ch: 
> 0x%x",
> fqid, channel);
> --
> 2.25.1
>


Re: [PATCH 00/25] Rid W=1 warnings in SoC

2020-11-23 Thread Li Yang
On Tue, Nov 3, 2020 at 9:29 AM Lee Jones  wrote:
>
> This set is part of a larger effort attempting to clean-up W=1
> kernel builds, which are currently overwhelmingly riddled with
> niggly little warnings.
>
> Lee Jones (25):

>   soc: fsl: dpio: qbman-portal: Fix a bunch of kernel-doc misdemeanours
>   soc: fsl: qe: qe_common: Fix misnamed function attribute 'addr'
>   soc: fsl: qbman: qman: Remove unused variable 'dequeue_wq'

The above are applied for next.  Thanks.

Regards,
Leo
>
>  drivers/soc/bcm/brcmstb/pm/pm-arm.c  |  2 +
>  drivers/soc/fsl/dpio/qbman-portal.c  | 18 +--
>  drivers/soc/fsl/qbman/qman.c |  8 +--
>  drivers/soc/fsl/qe/qe_common.c   |  2 +-
>  drivers/soc/qcom/kryo-l2-accessors.c |  2 +-
>  drivers/soc/qcom/llcc-qcom.c |  2 +-
>  drivers/soc/qcom/qcom-geni-se.c  |  5 +-
>  drivers/soc/qcom/qcom_aoss.c |  4 +-
>  drivers/soc/qcom/rpmh.c  |  2 +-
>  drivers/soc/qcom/rpmhpd.c|  3 ++
>  drivers/soc/qcom/smem.c  |  3 +-
>  drivers/soc/qcom/smp2p.c |  3 +-
>  drivers/soc/qcom/smsm.c  |  4 +-
>  drivers/soc/qcom/wcnss_ctrl.c|  8 +--
>  drivers/soc/rockchip/io-domain.c |  3 --
>  drivers/soc/samsung/s3c-pm-check.c   |  2 +-
>  drivers/soc/tegra/fuse/speedo-tegra124.c |  7 ++-
>  drivers/soc/tegra/fuse/speedo-tegra210.c |  8 +--
>  drivers/soc/ti/k3-ringacc.c  |  1 +
>  drivers/soc/ti/knav_dma.c|  2 +-
>  drivers/soc/ti/knav_qmss_queue.c | 62 
>  drivers/soc/ti/pm33xx.c  |  4 +-
>  drivers/soc/ti/wkup_m3_ipc.c |  8 ++-
>  23 files changed, 86 insertions(+), 77 deletions(-)
>
> Cc: act 
> Cc: Andy Gross 
> Cc: bcm-kernel-feedback-l...@broadcom.com
> Cc: Ben Dooks 
> Cc: Bjorn Andersson 
> Cc: Cyril Chemparathy 
> Cc: Dan Malek 
> Cc: Dave Gerlach 
> Cc: Doug Anderson 
> Cc: Florian Fainelli 
> Cc: Heiko Stuebner 
> Cc: Jonathan Hunter 
> Cc: Krzysztof Kozlowski 
> Cc: Liam Girdwood 
> Cc: linux-arm-...@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-rockc...@lists.infradead.org
> Cc: linux-samsung-...@vger.kernel.org
> Cc: linux-te...@vger.kernel.org
> Cc: Li Yang 
> Cc: Mark Brown 
> Cc: Qiang Zhao 
> Cc: "Rafael J. Wysocki" 
> Cc: Roy Pledge 
> Cc: Sandeep Nair 
> Cc: Santosh Shilimkar 
> Cc: Scott Wood 
> Cc: "Software, Inc" 
> Cc: Thierry Reding 
> Cc: Vitaly Bordug 
> Cc: YueHaibing 
>
> --
> 2.25.1
>


Re: linux-next: build failure in Linus' tree

2020-11-23 Thread Daniel Axtens
Thanks sfr and mpe.

> Applied to powerpc/fixes.
>
> [1/1] powerpc/64s: Fix allnoconfig build since uaccess flush
>   
> https://git.kernel.org/powerpc/c/b6b79dd53082db11070b4368d85dd6699ff0b063

We also needed a similar fix for stable, which has also been applied.

I guess I should build some sort of build process that tests a whole
range of configs. I did test a few but clearly not enough. Is there a
known list that I should be using? Something from kisskb?

Kind regards,
Daniel

Michael Ellerman  writes:

> On Mon, 23 Nov 2020 18:40:16 +1100, Stephen Rothwell wrote:
>> After merging most of the trees, today's linux-next build (powerpc64
>> allnoconfig) failed like this:
>> 
>> In file included from arch/powerpc/include/asm/kup.h:18,
>>  from arch/powerpc/include/asm/uaccess.h:9,
>>  from include/linux/uaccess.h:11,
>>  from include/linux/sched/task.h:11,
>>  from include/linux/sched/signal.h:9,
>>  from include/linux/rcuwait.h:6,
>>  from include/linux/percpu-rwsem.h:7,
>>  from include/linux/fs.h:33,
>>  from include/linux/compat.h:17,
>>  from arch/powerpc/kernel/asm-offsets.c:14:
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: data 
>> definition has no type or storage class
>>66 | DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
>>   | ^~~~
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: error: type defaults to 
>> 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int]
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: parameter 
>> names (without types) in function declaration
>> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function 
>> 'prevent_user_access':
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:6: error: implicit 
>> declaration of function 'static_branch_unlikely' 
>> [-Werror=implicit-function-declaration]
>>   180 |  if (static_branch_unlikely(_flush_key))
>>   |  ^~
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: error: 
>> 'uaccess_flush_key' undeclared (first use in this function)
>>   180 |  if (static_branch_unlikely(_flush_key))
>>   |  ^
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: note: each undeclared 
>> identifier is reported only once for each function it appears in
>> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function 
>> 'prevent_user_access_return':
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:189:30: error: 
>> 'uaccess_flush_key' undeclared (first use in this function)
>>   189 |  if (static_branch_unlikely(_flush_key))
>>   |  ^
>> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function 
>> 'restore_user_access':
>> arch/powerpc/include/asm/book3s/64/kup-radix.h:198:30: error: 
>> 'uaccess_flush_key' undeclared (first use in this function)
>>   198 |  if (static_branch_unlikely(_flush_key) && flags == 
>> AMR_KUAP_BLOCKED)
>>   |  ^
>> 
>> [...]
>


Re: linux-next: build failure in Linus' tree

2020-11-23 Thread Michael Ellerman
On Mon, 23 Nov 2020 18:40:16 +1100, Stephen Rothwell wrote:
> After merging most of the trees, today's linux-next build (powerpc64
> allnoconfig) failed like this:
> 
> In file included from arch/powerpc/include/asm/kup.h:18,
>  from arch/powerpc/include/asm/uaccess.h:9,
>  from include/linux/uaccess.h:11,
>  from include/linux/sched/task.h:11,
>  from include/linux/sched/signal.h:9,
>  from include/linux/rcuwait.h:6,
>  from include/linux/percpu-rwsem.h:7,
>  from include/linux/fs.h:33,
>  from include/linux/compat.h:17,
>  from arch/powerpc/kernel/asm-offsets.c:14:
> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: data definition 
> has no type or storage class
>66 | DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
>   | ^~~~
> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: error: type defaults to 
> 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int]
> arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: parameter names 
> (without types) in function declaration
> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function 
> 'prevent_user_access':
> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:6: error: implicit 
> declaration of function 'static_branch_unlikely' 
> [-Werror=implicit-function-declaration]
>   180 |  if (static_branch_unlikely(_flush_key))
>   |  ^~
> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: error: 
> 'uaccess_flush_key' undeclared (first use in this function)
>   180 |  if (static_branch_unlikely(_flush_key))
>   |  ^
> arch/powerpc/include/asm/book3s/64/kup-radix.h:180:30: note: each undeclared 
> identifier is reported only once for each function it appears in
> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function 
> 'prevent_user_access_return':
> arch/powerpc/include/asm/book3s/64/kup-radix.h:189:30: error: 
> 'uaccess_flush_key' undeclared (first use in this function)
>   189 |  if (static_branch_unlikely(_flush_key))
>   |  ^
> arch/powerpc/include/asm/book3s/64/kup-radix.h: In function 
> 'restore_user_access':
> arch/powerpc/include/asm/book3s/64/kup-radix.h:198:30: error: 
> 'uaccess_flush_key' undeclared (first use in this function)
>   198 |  if (static_branch_unlikely(_flush_key) && flags == 
> AMR_KUAP_BLOCKED)
>   |  ^
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/64s: Fix allnoconfig build since uaccess flush
  https://git.kernel.org/powerpc/c/b6b79dd53082db11070b4368d85dd6699ff0b063

cheers


Re: [PATCH v3 3/3] powerpc/64s: feature: Work around inline asm issues

2020-11-23 Thread Segher Boessenkool
On Mon, Nov 23, 2020 at 12:01:01PM -0800, Bill Wendling wrote:
> On Mon, Nov 23, 2020 at 11:58 AM Segher Boessenkool
>  wrote:
> > > On Sun, Nov 22, 2020 at 10:36 PM Segher Boessenkool
> > >  wrote:
> > > > "true" (as a result of a comparison) in as is -1, not 1.
> >
> > On Mon, Nov 23, 2020 at 11:43:11AM -0800, Bill Wendling wrote:
> > > What Segher said. :-) Also, if you reverse the comparison, you'll get
> > > a build error.
> >
> > But that means your patch is the wrong way around?
> >
> > -   .ifgt (label##4b- label##3b)-(label##2b- label##1b);\
> > -   .error "Feature section else case larger than body";\
> > -   .endif; \
> > +   .org . - ((label##4b-label##3b) > (label##2b-label##1b)); \
> >
> > It should be a + in that last line, not a -.
> 
> I said so in a follow up email.

Yeah, and that arrived a second after I pressed "send" :-)

> > Was this tested?
> >
> Please don't be insulting. Anyone can make an error.

Absolutely, but it is just a question.  It seems you could improve that
testing!  It helps you yourself most of all ;-)


Segher


Re: [PATCH v3 3/3] powerpc/64s: feature: Work around inline asm issues

2020-11-23 Thread Segher Boessenkool
(Please don't top-post.)

> On Sun, Nov 22, 2020 at 10:36 PM Segher Boessenkool
>  wrote:
> > "true" (as a result of a comparison) in as is -1, not 1.

On Mon, Nov 23, 2020 at 11:43:11AM -0800, Bill Wendling wrote:
> What Segher said. :-) Also, if you reverse the comparison, you'll get
> a build error.

But that means your patch is the wrong way around?

-   .ifgt (label##4b- label##3b)-(label##2b- label##1b);\
-   .error "Feature section else case larger than body";\
-   .endif; \
+   .org . - ((label##4b-label##3b) > (label##2b-label##1b)); \

It should be a + in that last line, not a -.  Was this tested?


Segher


Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too

2020-11-23 Thread Oleg Nesterov
Christophe, et al,

So what?

Are you going to push your change or should I re-send 1-2 without
whitespace cleanups?

On 11/19, Oleg Nesterov wrote:
>
> On 11/19, Christophe Leroy wrote:
> >
> > I think the following should work, and not require the first patch (compile
> > tested only).
> >
> > --- a/arch/powerpc/kernel/ptrace/ptrace-view.c
> > +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
> > @@ -234,9 +234,21 @@ static int gpr_get(struct task_struct *target, const
> > struct user_regset *regset,
> > BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
> >  offsetof(struct pt_regs, msr) + sizeof(long));
> > 
> > +#ifdef CONFIG_PPC64
> > +   membuf_write(, >thread.regs->orig_gpr3,
> > +offsetof(struct pt_regs, softe) - offsetof(struct pt_regs,
> > orig_gpr3));
> > +   membuf_store(, 1UL);
> > +
> > +   BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
> > +offsetof(struct pt_regs, softe) + sizeof(long));
> > +
> > +   membuf_write(, >thread.regs->trap,
> > +sizeof(struct user_pt_regs) - offsetof(struct pt_regs, 
> > trap));
> > +#else
> > membuf_write(, >thread.regs->orig_gpr3,
> > sizeof(struct user_pt_regs) -
> > offsetof(struct pt_regs, orig_gpr3));
> > +#endif
> > return membuf_zero(, ELF_NGREG * sizeof(unsigned long) -
> >  sizeof(struct user_pt_regs));
> >  }
> 
> Probably yes.
> 
> This mirrors the previous patch I sent 
> (https://lore.kernel.org/lkml/20190917143753.ga12...@redhat.com/)
> and this is exactly what I tried to avoid, we can make a simpler fix now.
> 
> But let me repeat, I agree with any fix even if imp my version simplifies the 
> code, just
> commit this change and lets forget this problem.
> 
> Oleg.



Re: [PATCH v2 04/19] powerpc/perf: move perf irq/nmi handling details into traps.c

2020-11-23 Thread Athira Rajeev



> On 11-Nov-2020, at 3:13 PM, Nicholas Piggin  wrote:
> 
> This is required in order to allow more significant differences between
> NMI type interrupt handlers and regular asynchronous handlers.
> 
> Signed-off-by: Nicholas Piggin 
> ---
> arch/powerpc/kernel/traps.c  | 31 +++-
> arch/powerpc/perf/core-book3s.c  | 35 ++--
> arch/powerpc/perf/core-fsl-emb.c | 25 ---
> 3 files changed, 32 insertions(+), 59 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 902fcbd1a778..7dda72eb97cc 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1919,11 +1919,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)
> }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 
> -void performance_monitor_exception(struct pt_regs *regs)
> +static void performance_monitor_exception_nmi(struct pt_regs *regs)
> +{
> + nmi_enter();
> +
> + __this_cpu_inc(irq_stat.pmu_irqs);
> +
> + perf_irq(regs);
> +
> + nmi_exit();
> +}
> +
> +static void performance_monitor_exception_async(struct pt_regs *regs)
> {
> + irq_enter();
> +
>   __this_cpu_inc(irq_stat.pmu_irqs);
> 
>   perf_irq(regs);
> +
> + irq_exit();
> +}
> +
> +void performance_monitor_exception(struct pt_regs *regs)
> +{
> + /*
> +  * On 64-bit, if perf interrupts hit in a local_irq_disable
> +  * (soft-masked) region, we consider them as NMIs. This is required to
> +  * prevent hash faults on user addresses when reading callchains (and
> +  * looks better from an irq tracing perspective).
> +  */
> + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
> + performance_monitor_exception_nmi(regs);
> + else
> + performance_monitor_exception_async(regs);
> }
> 
> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 08643cba1494..9fd8cae09218 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -109,10 +109,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
> {
>   regs->result = 0;
> }
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> - return 0;
> -}
> 
> static inline int siar_valid(struct pt_regs *regs)
> {
> @@ -328,15 +324,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
>   regs->result = use_siar;
> }
> 
> -/*
> - * If interrupts were soft-disabled when a PMU interrupt occurs, treat
> - * it as an NMI.
> - */
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> - return (regs->softe & IRQS_DISABLED);
> -}
> -

Hi Nick,

arch_irq_disabled_regs checks the regs->softe value, if it has IRQS_DISABLED 
set.
Core-book3s is also using same logic in perf_intr_is_nmi to check if it is an 
NMI. With the
changes in this patch, if I understood correctly, we will do the irq/nmi 
handling in traps.c 
rather than doing it in the PMI interrupt handler.  But can you please help to 
understand
better on what is the perf weirdness (sometimes NMI, sometimes not) mentioned 
in the cover
letter that we are fixing with this change ?

Thanks
Athira

> /*
>  * On processors like P7+ that have the SIAR-Valid bit, marked instructions
>  * must be sampled only if the SIAR-valid bit is set.
> @@ -2224,7 +2211,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
>   struct perf_event *event;
>   unsigned long val[8];
>   int found, active;
> - int nmi;
> 
>   if (cpuhw->n_limited)
>   freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
> @@ -2232,18 +2218,6 @@ static void __perf_event_interrupt(struct pt_regs 
> *regs)
> 
>   perf_read_regs(regs);
> 
> - /*
> -  * If perf interrupts hit in a local_irq_disable (soft-masked) region,
> -  * we consider them as NMIs. This is required to prevent hash faults on
> -  * user addresses when reading callchains. See the NMI test in
> -  * do_hash_page.
> -  */
> - nmi = perf_intr_is_nmi(regs);
> - if (nmi)
> - nmi_enter();
> - else
> - irq_enter();
> -
>   /* Read all the PMCs since we'll need them a bunch of times */
>   for (i = 0; i < ppmu->n_counter; ++i)
>   val[i] = read_pmc(i + 1);
> @@ -2289,8 +2263,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)
>   }
>   }
>   }
> - if (!found && !nmi && printk_ratelimit())
> - printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
> + if (unlikely(!found) && !arch_irq_disabled_regs(regs))
> + printk_ratelimited(KERN_WARNING "Can't find PMC that caused 
> IRQ\n");
> 
>   /*
>* Reset MMCR0 to its normal value.  This will set PMXE and
> @@ -2300,11 +2274,6 @@ static void __perf_event_interrupt(struct pt_regs 
> *regs)
>* we get back out of this interrupt.
>*/
>   

Re: Linux kernel: powerpc: RTAS calls can be used to compromise kernel integrity

2020-11-23 Thread Andrew Donnellan

On 9/10/20 12:20 pm, Andrew Donnellan wrote:
The Linux kernel for powerpc has an issue with the Run-Time Abstraction 
Services (RTAS) interface, allowing root (or CAP_SYS_ADMIN users) in a 
VM to overwrite some parts of memory, including kernel memory.


This issue impacts guests running on top of PowerVM or KVM hypervisors 
(pseries platform), and does *not* impact bare-metal machines (powernv 
platform).

CVE-2020-2 has been assigned.

--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited


Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set

2020-11-23 Thread Athira Rajeev



> On 23-Nov-2020, at 4:49 PM, Michael Ellerman  wrote:
> 
> Hi Athira,
> 
> Athira Rajeev  writes:
>> On systems without any platform specific PMU driver support registered or
>> Generic Compat PMU support registered,
> 
> The compat PMU is registered just like other PMUs, so I don't see how we
> can crash like this if the compat PMU is active?
> 
> ie. if we're using the compat PMU then ppmu will be non-NULL and point
> to generic_compat_pmu.

Hi Michael,

Thanks for checking the patch.

Crash happens on systems which neither has compat PMU support registered nor 
has Platform specific PMU. This happens when the distro do not have either the 
PMU 
driver support for that platform or the generic "compat-mode" performance 
monitoring
driver support. 

So in such cases since compat PMU is in-active, ppmu is not set and
results in crash. Sorry for the confusion with my first line. I will correct it.

> 
>> running 'perf record' with
>> —intr-regs  will crash ( perf record -I  ).
>> 
>> The relevant portion from crash logs and Call Trace:
>> 
>> Unable to handle kernel paging request for data at address 0x0068
>> Faulting instruction address: 0xc013eb18
>> Oops: Kernel access of bad area, sig: 11 [#1]
>> CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 
>> 4.18.0-193.el8.ppc64le #1
>> NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
>> REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
>> NIP [c013eb18] is_sier_available+0x18/0x30
>> LR [c0139f2c] perf_reg_value+0x6c/0xb0
>> Call Trace:
>> [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
>> [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
>> [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
>> [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
>> [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
>> [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
>> [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
>> [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
>> [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120
>> 
>> When perf record session started with "-I" option, capture registers
>  ^
>  is
> 
>> via intr-regs,
> 
> "intr-regs" is just the full name for the -I option, so that kind of
> repeats itself.
> 
>> on each sample ‘is_sier_available()'i is called to check
>  ^
>  extra i
> 
> The single quotes around is_sier_available() aren't necessary IMO.
> 
>> for the SIER ( Sample Instruction Event Register) availability in the
>^
>stray space
>> platform. This function in core-book3s access 'ppmu->flags'. If platform
>   ^ ^
>   esa
>> specific pmu driver is not registered, ppmu is set to null and accessing
>   ^^
>   PMU  NULL
>> its members results in crash. Patch fixes this by returning false in
>^
>a
>> 'is_sier_available()' if 'ppmu' is not set.
> 
> Use the imperative mood for the last sentence which says what the patch
> does:
> 
>  Fix the crash by returning false in is_sier_available() if ppmu is not set.

Sure,  I will make all these changes as suggested.

Thanks
Athira
> 
> 
>> Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include 
>> SIER")
>> Reported-by: Sachin Sant 
>> Signed-off-by: Athira Rajeev 
>> ---
>> arch/powerpc/perf/core-book3s.c | 3 +++
>> 1 file changed, 3 insertions(+)
>> 
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index 08643cb..1de4770 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
>> 
>> bool is_sier_available(void)
>> {
>> +if (!ppmu)
>> +return false;
>> +
>>  if (ppmu->flags & PPMU_HAS_SIER)
>>  return true;
>> 
>> -- 
>> 1.8.3.1
> 
> 
> cheers



Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set

2020-11-23 Thread Michael Ellerman
Hi Athira,

Athira Rajeev  writes:
> On systems without any platform specific PMU driver support registered or
> Generic Compat PMU support registered,

The compat PMU is registered just like other PMUs, so I don't see how we
can crash like this if the compat PMU is active?

ie. if we're using the compat PMU then ppmu will be non-NULL and point
to generic_compat_pmu.

> running 'perf record' with
> —intr-regs  will crash ( perf record -I  ).
>
> The relevant portion from crash logs and Call Trace:
>
> Unable to handle kernel paging request for data at address 0x0068
> Faulting instruction address: 0xc013eb18
> Oops: Kernel access of bad area, sig: 11 [#1]
> CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le 
> #1
> NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
> REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
> NIP [c013eb18] is_sier_available+0x18/0x30
> LR [c0139f2c] perf_reg_value+0x6c/0xb0
> Call Trace:
> [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
> [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
> [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
> [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
> [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
> [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
> [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
> [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
> [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120
>
> When perf record session started with "-I" option, capture registers
  ^
  is

> via intr-regs,

"intr-regs" is just the full name for the -I option, so that kind of
repeats itself.

> on each sample ‘is_sier_available()'i is called to check
  ^
  extra i

The single quotes around is_sier_available() aren't necessary IMO.

> for the SIER ( Sample Instruction Event Register) availability in the
^
stray space
> platform. This function in core-book3s access 'ppmu->flags'. If platform
   ^ ^
   esa
> specific pmu driver is not registered, ppmu is set to null and accessing
   ^^
   PMU  NULL
> its members results in crash. Patch fixes this by returning false in
^
a
> 'is_sier_available()' if 'ppmu' is not set.

Use the imperative mood for the last sentence which says what the patch
does:

  Fix the crash by returning false in is_sier_available() if ppmu is not set.


> Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include 
> SIER")
> Reported-by: Sachin Sant 
> Signed-off-by: Athira Rajeev 
> ---
>  arch/powerpc/perf/core-book3s.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 08643cb..1de4770 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
>  
>  bool is_sier_available(void)
>  {
> + if (!ppmu)
> + return false;
> +
>   if (ppmu->flags & PPMU_HAS_SIER)
>   return true;
>  
> -- 
> 1.8.3.1


cheers


Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events

2020-11-23 Thread Michael Ellerman
Namhyung Kim  writes:
> Hi Peter and Kan,
>
> (Adding PPC folks)
>
> On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim  wrote:
>>
>> Hello,
>>
>> On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan  wrote:
>> >
>> >
>> >
>> > On 11/11/2020 11:25 AM, Peter Zijlstra wrote:
>> > > On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote:
>> > >
>> > >> - When the large PEBS was introduced (9c964efa4330), the sched_task() 
>> > >> should
>> > >> be invoked to flush the PEBS buffer in each context switch. However, The
>> > >> perf_sched_events in account_event() is not updated accordingly. The
>> > >> perf_event_task_sched_* never be invoked for a pure per-CPU context. 
>> > >> Only
>> > >> per-task event works.
>> > >> At that time, the perf_pmu_sched_task() is outside of
>> > >> perf_event_context_sched_in/out. It means that perf has to double
>> > >> perf_pmu_disable() for per-task event.
>> > >
>> > >> - The patch 1 tries to fix broken per-CPU events. The CPU context 
>> > >> cannot be
>> > >> retrieved from the task->perf_event_ctxp. So it has to be tracked in the
>> > >> sched_cb_list. Yes, the code is very similar to the original codes, but 
>> > >> it
>> > >> is actually the new code for per-CPU events. The optimization for 
>> > >> per-task
>> > >> events is still kept.
>> > >>For the case, which has both a CPU context and a task context, yes, 
>> > >> the
>> > >> __perf_pmu_sched_task() in this patch is not invoked. Because the
>> > >> sched_task() only need to be invoked once in a context switch. The
>> > >> sched_task() will be eventually invoked in the task context.
>> > >
>> > > The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB and
>> > > only set that for large pebs. Are you sure the other users (Intel LBR
>> > > and PowerPC BHRB) don't need it?
>> >
>> > I didn't set it for LBR, because the perf_sched_events is always enabled
>> > for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB
>> > for LBR.
>> >
>> > if (has_branch_stack(event))
>> > inc = true;
>> >
>> > >
>> > > If they indeed do not require the pmu::sched_task() callback for CPU
>> > > events, then I still think the whole perf_sched_cb_{inc,dec}() interface
>> >
>> > No, LBR requires the pmu::sched_task() callback for CPU events.
>> >
>> > Now, The LBR registers have to be reset in sched in even for CPU events.
>> >
>> > To fix the shorter LBR callstack issue for CPU events, we also need to
>> > save/restore LBRs in pmu::sched_task().
>> > https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.li...@linux.intel.com/
>> >
>> > > is confusing at best.
>> > >
>> > > Can't we do something like this instead?
>> > >
>> > I think the below patch may have two issues.
>> > - PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as well) 
>> > now.
>> > - We may disable the large PEBS later if not all PEBS events support
>> > large PEBS. The PMU need a way to notify the generic code to decrease
>> > the nr_sched_task.
>>
>> Any updates on this?  I've reviewed and tested Kan's patches
>> and they all look good.
>>
>> Maybe we can talk to PPC folks to confirm the BHRB case?
>
> Can we move this forward?  I saw patch 3/3 also adds PERF_ATTACH_SCHED_CB
> for PowerPC too.  But it'd be nice if ppc folks can confirm the change.

Sorry I've read the whole thread, but I'm still not entirely sure I
understand the question.

cheers


Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set

2020-11-23 Thread Sachin Sant
> When perf record session started with "-I" option, capture registers
> via intr-regs, on each sample ‘is_sier_available()'i is called to check
> for the SIER ( Sample Instruction Event Register) availability in the
> platform. This function in core-book3s access 'ppmu->flags'. If platform
> specific pmu driver is not registered, ppmu is set to null and accessing
> its members results in crash. Patch fixes this by returning false in
> 'is_sier_available()' if 'ppmu' is not set.
> 
> Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include 
> SIER")
> Reported-by: Sachin Sant 
> Signed-off-by: Athira Rajeev 

Tested-by: Sachin Sant 

Thanks
-Sachin

Re: [PATCH V2 4/5] ocxl: Add mmu notifier

2020-11-23 Thread Frederic Barrat




On 20/11/2020 18:32, Christophe Lombard wrote:

Add invalidate_range mmu notifier, when required (ATSD access of MMIO
registers is available), to initiate TLB invalidation commands.
For the time being, the ATSD0 set of registers is used by default.

The pasid and bdf values have to be configured in the Process Element
Entry.
The PEE must be set up to match the BDF/PASID of the AFU.

Signed-off-by: Christophe Lombard 
---
  drivers/misc/ocxl/link.c | 58 +++-
  1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 20444db8a2bb..100bdfe9ec37 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -2,8 +2,10 @@
  // Copyright 2017 IBM Corp.
  #include 
  #include 
+#include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -33,6 +35,7 @@

  #define SPA_PE_VALID  0x8000

+struct ocxl_link;

  struct pe_data {
struct mm_struct *mm;
@@ -41,6 +44,8 @@ struct pe_data {
/* opaque pointer to be passed to the above callback */
void *xsl_err_data;
struct rcu_head rcu;
+   struct ocxl_link *link;
+   struct mmu_notifier mmu_notifier;
  };

  struct spa {
@@ -83,6 +88,8 @@ struct ocxl_link {
int domain;
int bus;
int dev;
+   void __iomem *arva; /* ATSD register virtual address */
+   spinlock_t atsd_lock;   /* to serialize shootdowns */
atomic_t irq_available;
struct spa *spa;
void *platform_data;
@@ -403,6 +410,11 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, 
struct ocxl_link **out_l
if (rc)
goto err_xsl_irq;

+   rc = pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0,
+ >arva);
+   if (!rc)
+   spin_lock_init(>atsd_lock);
+



We could use a comment to say that if arva = 0, then we don't need mmio 
shootdowns and we rely on hardware snooping.


Also, we could always initialize the spin lock, it doesn't hurt and make 
the code more readable.


  Fred



*out_link = link;
return 0;

@@ -454,6 +466,11 @@ static void release_xsl(struct kref *ref)
  {
struct ocxl_link *link = container_of(ref, struct ocxl_link, ref);

+   if (link->arva) {
+   pnv_ocxl_unmap_lpar(>arva);
+   link->arva = NULL;
+   }
+
list_del(>list);
/* call platform code before releasing data */
pnv_ocxl_spa_release(link->platform_data);
@@ -470,6 +487,26 @@ void ocxl_link_release(struct pci_dev *dev, void 
*link_handle)
  }
  EXPORT_SYMBOL_GPL(ocxl_link_release);

+static void invalidate_range(struct mmu_notifier *mn,
+struct mm_struct *mm,
+unsigned long start, unsigned long end)
+{
+   struct pe_data *pe_data = container_of(mn, struct pe_data, 
mmu_notifier);
+   struct ocxl_link *link = pe_data->link;
+   unsigned long addr, pid, page_size = PAGE_SIZE;
+
+   pid = mm->context.id;
+
+   spin_lock(>atsd_lock);
+   for (addr = start; addr < end; addr += page_size)
+   pnv_ocxl_tlb_invalidate(>arva, pid, addr);
+   spin_unlock(>atsd_lock);
+}
+
+static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
+   .invalidate_range = invalidate_range,
+};
+
  static u64 calculate_cfg_state(bool kernel)
  {
u64 state;
@@ -526,6 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
pidr, u32 tidr,
pe_data->mm = mm;
pe_data->xsl_err_cb = xsl_err_cb;
pe_data->xsl_err_data = xsl_err_data;
+   pe_data->link = link;
+   pe_data->mmu_notifier.ops = _mmu_notifier_ops;

memset(pe, 0, sizeof(struct ocxl_process_element));
pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
@@ -542,8 +581,16 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
pidr, u32 tidr,
 * by the nest MMU. If we have a kernel context, TLBIs are
 * already global.
 */
-   if (mm)
+   if (mm) {
mm_context_add_copro(mm);
+   if (link->arva) {
+   /* Use MMIO registers for the TLB Invalidate
+* operations.
+*/
+   mmu_notifier_register(_data->mmu_notifier, mm);
+   }
+   }
+
/*
 * Barrier is to make sure PE is visible in the SPA before it
 * is used by the device. It also helps with the global TLBI
@@ -674,6 +721,15 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
WARN(1, "Couldn't find pe data when removing PE\n");
} else {
if (pe_data->mm) {
+   if (link->arva) {
+   mmu_notifier_unregister(_data->mmu_notifier,
+   pe_data->mm);
+   

Re: [PATCH V2 3/5] ocxl: Update the Process Element Entry

2020-11-23 Thread Frederic Barrat




On 20/11/2020 18:32, Christophe Lombard wrote:

To complete the MMIO based mechanism, the fields: PASID, bus, device and
function of the Process Element Entry have to be filled. (See
OpenCAPI Power Platform Architecture document)

Hypervisor Process Element Entry
Word
 0 1  7  8  .. 12  13 ..15  16 19  20 ... 31
0  OSL Configuration State (0:31)
1  OSL Configuration State (32:63)
2   PASID  |Reserved
3   Bus   |   Device|Function |Reserved
4 Reserved
5 Reserved
6   

Signed-off-by: Christophe Lombard 
---
  drivers/misc/ocxl/context.c   | 4 +++-
  drivers/misc/ocxl/link.c  | 4 +++-
  drivers/misc/ocxl/ocxl_internal.h | 4 +++-
  drivers/scsi/cxlflash/ocxl_hw.c   | 6 --
  include/misc/ocxl.h   | 2 +-
  5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index c21f65a5c762..9eb0d93b01c6 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -70,6 +70,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, 
struct mm_struct *mm)
  {
int rc;
unsigned long pidr = 0;
+   struct pci_dev *dev;

// Locks both status & tidr
mutex_lock(>status_mutex);
@@ -81,8 +82,9 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, 
struct mm_struct *mm)
if (mm)
pidr = mm->context.id;

+   dev = to_pci_dev(ctx->afu->fn->dev.parent);
rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr,
- amr, mm, xsl_fault_error, ctx);
+ amr, pci_dev_id(dev), mm, xsl_fault_error, ctx);
if (rc)
goto out;

diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index fd73d3bc0eb6..20444db8a2bb 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -494,7 +494,7 @@ static u64 calculate_cfg_state(bool kernel)
  }

  int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
-   u64 amr, struct mm_struct *mm,
+   u64 amr, u64 bdf, struct mm_struct *mm,



bdf could/should be a u16, since that's per the PCI spec.

  Fred



void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
void *xsl_err_data)
  {
@@ -529,6 +529,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
pidr, u32 tidr,

memset(pe, 0, sizeof(struct ocxl_process_element));
pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+   pe->pasid = cpu_to_be32(pasid << (31 - 19));
+   pe->bdf = cpu_to_be32(bdf << (31 - 15));
pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
pe->pid = cpu_to_be32(pidr);
pe->tid = cpu_to_be32(tidr);
diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 0bad0a123af6..c9ce2af21d6f 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -84,7 +84,9 @@ struct ocxl_context {

  struct ocxl_process_element {
__be64 config_state;
-   __be32 reserved1[11];
+   __be32 pasid;
+   __be32 bdf;
+   __be32 reserved1[9];
__be32 lpid;
__be32 tid;
__be32 pid;
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index e4e0d767b98e..244fc27215dc 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -329,6 +329,7 @@ static int start_context(struct ocxlflash_context *ctx)
struct ocxl_hw_afu *afu = ctx->hw_afu;
struct ocxl_afu_config *acfg = >acfg;
void *link_token = afu->link_token;
+   struct pci_dev *pdev = afu->pdev;
struct device *dev = afu->dev;
bool master = ctx->master;
struct mm_struct *mm;
@@ -360,8 +361,9 @@ static int start_context(struct ocxlflash_context *ctx)
mm = current->mm;
}

-   rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, mm,
- ocxlflash_xsl_fault, ctx);
+   rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0,
+ pci_dev_id(pdev), mm, ocxlflash_xsl_fault,
+ ctx);
if (unlikely(rc)) {
dev_err(dev, "%s: ocxl_link_add_pe failed rc=%d\n",
__func__, rc);
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index e013736e275d..d0f101f428dd 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -447,7 +447,7 @@ void ocxl_link_release(struct pci_dev *dev, void 
*link_handle);
   * defined
   */
  int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
-   u64 amr, struct mm_struct *mm,
+   u64 amr, u64 bdf, struct mm_struct *mm,
void (*xsl_err_cb)(void 

Re: [PATCH V2 2/5] ocxl: Initiate a TLB invalidate command

2020-11-23 Thread Frederic Barrat




On 20/11/2020 18:32, Christophe Lombard wrote:

When a TLB Invalidate is required for the Logical Partition, the following
sequence has to be performed:

1. Load MMIO ATSD AVA register with the necessary value, if required.
2. Write the MMIO ATSD launch register to initiate the TLB Invalidate
command.
3. Poll the MMIO ATSD status register to determine when the TLB Invalidate
has been completed.

Signed-off-by: Christophe Lombard 
---
  arch/powerpc/include/asm/pnv-ocxl.h   | 50 
  arch/powerpc/platforms/powernv/ocxl.c | 55 +++
  2 files changed, 105 insertions(+)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h 
b/arch/powerpc/include/asm/pnv-ocxl.h
index 3f38aed7100c..9c90e87e7659 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -3,12 +3,59 @@
  #ifndef _ASM_PNV_OCXL_H
  #define _ASM_PNV_OCXL_H

+#include 
  #include 

  #define PNV_OCXL_TL_MAX_TEMPLATE63
  #define PNV_OCXL_TL_BITS_PER_RATE   4
  #define PNV_OCXL_TL_RATE_BUF_SIZE   ((PNV_OCXL_TL_MAX_TEMPLATE+1) * 
PNV_OCXL_TL_BITS_PER_RATE / 8)

+#define PNV_OCXL_ATSD_TIMEOUT  1
+
+/* TLB Management Instructions */
+#define PNV_OCXL_ATSD_LNCH 0x00
+/* Radix Invalidate */
+#define   PNV_OCXL_ATSD_LNCH_R PPC_BIT(0)
+/* Radix Invalidation Control
+ * 0b00 Just invalidate TLB.
+ * 0b01 Invalidate just Page Walk Cache.
+ * 0b10 Invalidate TLB, Page Walk Cache, and any
+ * caching of Partition and Process Table Entries.
+ */
+#define   PNV_OCXL_ATSD_LNCH_RIC   PPC_BITMASK(1, 2)
+/* Number and Page Size of translations to be invalidated */
+#define   PNV_OCXL_ATSD_LNCH_LPPPC_BITMASK(3, 10)
+/* Invalidation Criteria
+ * 0b00 Invalidate just the target VA.
+ * 0b01 Invalidate matching PID.
+ */
+#define   PNV_OCXL_ATSD_LNCH_ISPPC_BITMASK(11, 12)
+/* 0b1: Process Scope, 0b0: Partition Scope */
+#define   PNV_OCXL_ATSD_LNCH_PRS   PPC_BIT(13)
+/* Invalidation Flag */
+#define   PNV_OCXL_ATSD_LNCH_B PPC_BIT(14)
+/* Actual Page Size to be invalidated
+ * 000 4KB
+ * 101 64KB
+ * 001 2MB
+ * 010 1GB
+ */
+#define   PNV_OCXL_ATSD_LNCH_APPPC_BITMASK(15, 17)
+/* Defines the large page select
+ * L=0b0 for 4KB pages
+ * L=0b1 for large pages)
+ */
+#define   PNV_OCXL_ATSD_LNCH_L PPC_BIT(18)
+/* Process ID */
+#define   PNV_OCXL_ATSD_LNCH_PID   PPC_BITMASK(19, 38)
+/* NoFlush – Assumed to be 0b0 */
+#define   PNV_OCXL_ATSD_LNCH_F PPC_BIT(39)
+#define   PNV_OCXL_ATSD_LNCH_OCAPI_SLBIPPC_BIT(40)
+#define   PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON   PPC_BIT(41)
+#define PNV_OCXL_ATSD_AVA  0x08
+#define   PNV_OCXL_ATSD_AVA_AVAPPC_BITMASK(0, 51)
+#define PNV_OCXL_ATSD_STAT 0x10
+
  int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 
*supported);
  int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);

@@ -31,4 +78,7 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, 
int pe_handle);
  int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
  uint64_t lpcr, void __iomem **arva);
  void pnv_ocxl_unmap_lpar(void __iomem **arva);
+void pnv_ocxl_tlb_invalidate(void __iomem **arva,
+unsigned long pid,
+unsigned long addr);
  #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c 
b/arch/powerpc/platforms/powernv/ocxl.c
index bc20cf867900..07878496954b 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -531,3 +531,58 @@ void pnv_ocxl_unmap_lpar(void __iomem **arva)
iounmap(*arva);
  }
  EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
+
+void pnv_ocxl_tlb_invalidate(void __iomem **arva,



Similarly to the previous patch, there's no reason why arva should be a 
double-pointer.




+unsigned long pid,
+unsigned long addr)
+{
+   unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
+   uint64_t val = 0ull;
+   int pend;
+
+   if (!(*arva))
+   return;
+
+   if (addr) {
+   /* load Abbreviated Virtual Address register with
+* the necessary value
+*/
+   val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
+   out_be64(*arva + PNV_OCXL_ATSD_AVA, val);
+   }
+
+   /* Write access initiates a shoot down to initiate the
+* TLB Invalidate command
+*/
+   val = PNV_OCXL_ATSD_LNCH_R;
+   if (addr) {
+   val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b00);
+   val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
+   } else {
+   val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
+   val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
+   val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
+   

Re: [PATCH V2 1/5] ocxl: Assign a register set to a Logical Partition

2020-11-23 Thread Frederic Barrat




On 20/11/2020 18:32, Christophe Lombard wrote:

Platform specific function to assign a register set to a Logical Partition.
The "ibm,mmio-atsd" property, provided by the firmware, contains the 16
base ATSD physical addresses (ATSD0 through ATSD15) of the set of MMIO
registers (XTS MMIO ATSDx LPARID/AVA/launch/status register).

For the time being, the ATSD0 set of registers is used by default.

Signed-off-by: Christophe Lombard 
---
  arch/powerpc/include/asm/pnv-ocxl.h   |  3 ++
  arch/powerpc/platforms/powernv/ocxl.c | 48 +++
  2 files changed, 51 insertions(+)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h 
b/arch/powerpc/include/asm/pnv-ocxl.h
index d37ededca3ee..3f38aed7100c 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -28,4 +28,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, 
int PE_mask, void **p
  void pnv_ocxl_spa_release(void *platform_data);
  int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);

+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+ uint64_t lpcr, void __iomem **arva);
+void pnv_ocxl_unmap_lpar(void __iomem **arva);
  #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c 
b/arch/powerpc/platforms/powernv/ocxl.c
index ecdad219d704..bc20cf867900 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -483,3 +483,51 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, 
int pe_handle)
return rc;
  }
  EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+ uint64_t lpcr, void __iomem **arva)
+{
+   struct pci_controller *hose = pci_bus_to_host(dev->bus);
+   struct pnv_phb *phb = hose->private_data;
+   u64 mmio_atsd;
+   int rc;
+
+   /* ATSD physical address.
+* ATSD LAUNCH register: write access initiates a shoot down to
+* initiate the TLB Invalidate command.
+*/
+   rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+   0, _atsd);
+   if (rc) {
+   dev_info(>dev, "No available ATSD found\n");
+   return rc;
+   }
+
+   /* Assign a register set to a Logical Partition and MMIO ATSD
+* LPARID register to the required value.
+*/
+   if (mmio_atsd)



If we don't have the "ibm,mmio-atsd", i.e on P9, then we've already 
exited above. So why not consider mmio_atsd as an error?




+   rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
+  lparid, lpcr);
+   if (rc) {
+   dev_err(>dev, "Error mapping device to LPAR: %d\n", rc);
+   return rc;
+   }
+
+   if (mmio_atsd) {



Same here



+   *arva = ioremap(mmio_atsd, 24);
+   if (!(*arva)) {
+   dev_warn(>dev, "ioremap failed - mmio_atsd: 
%#llx\n", mmio_atsd);
+   rc = -ENOMEM;
+   }
+   }
+
+   return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
+
+void pnv_ocxl_unmap_lpar(void __iomem **arva)



The arva argument doesn't need to be a double pointer. Void * is enough.

  Fred



+{
+   iounmap(*arva);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);



[PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set

2020-11-23 Thread Athira Rajeev
On systems without any platform specific PMU driver support registered or
Generic Compat PMU support registered, running 'perf record' with
—intr-regs  will crash ( perf record -I  ).

The relevant portion from crash logs and Call Trace:

Unable to handle kernel paging request for data at address 0x0068
Faulting instruction address: 0xc013eb18
Oops: Kernel access of bad area, sig: 11 [#1]
CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1
NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
NIP [c013eb18] is_sier_available+0x18/0x30
LR [c0139f2c] perf_reg_value+0x6c/0xb0
Call Trace:
[c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
[c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
[c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
[c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
[c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
[c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
[c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
[c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
[c004a07abe30] [c00091c4] decrementer_common+0x114/0x120

When perf record session started with "-I" option, capture registers
via intr-regs, on each sample ‘is_sier_available()'i is called to check
for the SIER ( Sample Instruction Event Register) availability in the
platform. This function in core-book3s access 'ppmu->flags'. If platform
specific pmu driver is not registered, ppmu is set to null and accessing
its members results in crash. Patch fixes this by returning false in
'is_sier_available()' if 'ppmu' is not set.

Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER")
Reported-by: Sachin Sant 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..1de4770 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
 
 bool is_sier_available(void)
 {
+   if (!ppmu)
+   return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
 
-- 
1.8.3.1



[PATCH v4] dt-bindings: misc: convert fsl,qoriq-mc from txt to YAML

2020-11-23 Thread Laurentiu Tudor
From: Ionut-robert Aron 

Convert fsl,qoriq-mc to YAML in order to automate the verification
process of dts files. In addition, update MAINTAINERS accordingly
and, while at it, add some missing files.

Signed-off-by: Ionut-robert Aron 
[laurentiu.tu...@nxp.com: update MINTAINERS, updates & fixes in schema]
Signed-off-by: Laurentiu Tudor 
---
Changes in v4:
 - use $ref to point to fsl,qoriq-mc-dpmac binding

Changes in v3:
 - dropped duplicated "fsl,qoriq-mc-dpmac" schema and replaced with
   reference to it
 - fixed a dt_binding_check warning

Changes in v2:
 - fixed errors reported by yamllint
 - dropped multiple unnecessary quotes
 - used schema instead of text in description
 - added constraints on dpmac reg property

 .../devicetree/bindings/misc/fsl,qoriq-mc.txt | 196 --
 .../bindings/misc/fsl,qoriq-mc.yaml   | 186 +
 .../ethernet/freescale/dpaa2/overview.rst |   5 +-
 MAINTAINERS   |   4 +-
 4 files changed, 193 insertions(+), 198 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
 create mode 100644 Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml

diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt 
b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
deleted file mode 100644
index 7b486d4985dc..
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ /dev/null
@@ -1,196 +0,0 @@
-* Freescale Management Complex
-
-The Freescale Management Complex (fsl-mc) is a hardware resource
-manager that manages specialized hardware objects used in
-network-oriented packet processing applications. After the fsl-mc
-block is enabled, pools of hardware resources are available, such as
-queues, buffer pools, I/O interfaces. These resources are building
-blocks that can be used to create functional hardware objects/devices
-such as network interfaces, crypto accelerator instances, L2 switches,
-etc.
-
-For an overview of the DPAA2 architecture and fsl-mc bus see:
-Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
-
-As described in the above overview, all DPAA2 objects in a DPRC share the
-same hardware "isolation context" and a 10-bit value called an ICID
-(isolation context id) is expressed by the hardware to identify
-the requester.
-
-The generic 'iommus' property is insufficient to describe the relationship
-between ICIDs and IOMMUs, so an iommu-map property is used to define
-the set of possible ICIDs under a root DPRC and how they map to
-an IOMMU.
-
-For generic IOMMU bindings, see
-Documentation/devicetree/bindings/iommu/iommu.txt.
-
-For arm-smmu binding, see:
-Documentation/devicetree/bindings/iommu/arm,smmu.yaml.
-
-The MSI writes are accompanied by sideband data which is derived from the ICID.
-The msi-map property is used to associate the devices with both the ITS
-controller and the sideband data which accompanies the writes.
-
-For generic MSI bindings, see
-Documentation/devicetree/bindings/interrupt-controller/msi.txt.
-
-For GICv3 and GIC ITS bindings, see:
-Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml.
-
-Required properties:
-
-- compatible
-Value type: 
-Definition: Must be "fsl,qoriq-mc".  A Freescale Management Complex
-compatible with this binding must have Block Revision
-Registers BRR1 and BRR2 at offset 0x0BF8 and 0x0BFC in
-the MC control register region.
-
-- reg
-Value type: 
-Definition: A standard property.  Specifies one or two regions
-defining the MC's registers:
-
-   -the first region is the command portal for the
-this machine and must always be present
-
-   -the second region is the MC control registers. This
-region may not be present in some scenarios, such
-as in the device tree presented to a virtual machine.
-
-- ranges
-Value type: 
-Definition: A standard property.  Defines the mapping between the child
-MC address space and the parent system address space.
-
-The MC address space is defined by 3 components:
- 
-
-Valid values for region type are
-   0x0 - MC portals
-   0x1 - QBMAN portals
-
-- #address-cells
-Value type: 
-Definition: Must be 3.  (see definition in 'ranges' property)
-
-- #size-cells
-Value type: 
-Definition: Must be 1.
-
-Sub-nodes:
-
-The fsl-mc node may optionally have dpmac sub-nodes that describe
-the relationship between the Ethernet MACs which belong to the MC
-and the Ethernet PHYs on the system board.
-
-The dpmac nodes must be under a node named "dpmacs" which contains
-the