Re: [PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-13 Thread Michael Ellerman
On Sat, 7 Aug 2021 09:20:57 +0200, Cédric Le Goater wrote:
> On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
> runtime. Today, the IPI is not created for such nodes, and hot-plugged
> CPUs use a bogus IPI, which leads to soft lockups.
> 
> We can not directly allocate and request the IPI on demand because
> bringup_up() is called under the IRQ sparse lock. The alternative is
> to allocate the IPIs for all possible nodes at startup and to request
> the mapping on demand when the first CPU of a node is brought up.

Applied to powerpc/fixes.

[1/1] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs
  https://git.kernel.org/powerpc/c/cbc06f051c524dcfe52ef0d1f30647828e226d30

cheers


Re: [PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-10 Thread Cédric Le Goater
On 8/7/21 9:20 AM, Cédric Le Goater wrote:
> On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
> runtime. Today, the IPI is not created for such nodes, and hot-plugged
> CPUs use a bogus IPI, which leads to soft lockups.
> 
> We can not directly allocate and request the IPI on demand because
> bringup_up() is called under the IRQ sparse lock. The alternative is
> to allocate the IPIs for all possible nodes at startup and to request
> the mapping on demand when the first CPU of a node is brought up.
> 
> Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node")
> Cc: sta...@vger.kernel.org # v5.13
> Reported-by: Geetika Moolchandani 
> Cc: Srikar Dronamraju 
> Cc: Laurent Vivier 
> Signed-off-by: Cédric Le Goater 
> Message-Id: <20210629131542.743888-1-...@kaod.org>
> Signed-off-by: Cédric Le Goater 
> ---
>  arch/powerpc/sysdev/xive/common.c | 35 +--
>  1 file changed, 24 insertions(+), 11 deletions(-)

I forgot to add that this version does break irqbalance anymore, since
Linux is not mapping interrupts of CPU-less nodes.

Anyhow, irqbalance is now fixed : 

  
https://github.com/Irqbalance/irqbalance/commit/a7f81483a95a94d6a62ca7fb999a090e01c0de9b

So v1 (plus irqbalance patch above) or v2 are safe to use. I do prefer 
v2.

Thanks to Laurent and Srikar for the extra tests,

C.

> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index dbdbbc2f1dc5..943fd30095af 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
>  static struct xive_ipi_desc {
>   unsigned int irq;
>   char name[16];
> + atomic_t started;
>  } *xive_ipis;
>  
>  /*
> @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops 
> xive_ipi_irq_domain_ops = {
>   .alloc  = xive_ipi_irq_domain_alloc,
>  };
>  
> -static int __init xive_request_ipi(void)
> +static int __init xive_init_ipis(void)
>  {
>   struct fwnode_handle *fwnode;
>   struct irq_domain *ipi_domain;
> @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
>   struct xive_ipi_desc *xid = _ipis[node];
>   struct xive_ipi_alloc_info info = { node };
>  
> - /* Skip nodes without CPUs */
> - if (cpumask_empty(cpumask_of_node(node)))
> - continue;
> -
>   /*
>* Map one IPI interrupt per node for all cpus of that node.
>* Since the HW interrupt number doesn't have any meaning,
> @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
>   xid->irq = ret;
>  
>   snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
> -
> - ret = request_irq(xid->irq, xive_muxed_ipi_action,
> -   IRQF_PERCPU | IRQF_NO_THREAD, xid->name, 
> NULL);
> -
> - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
>   }
>  
>   return ret;
> @@ -1178,6 +1170,22 @@ static int __init xive_request_ipi(void)
>   return ret;
>  }
>  
> +static int __init xive_request_ipi(unsigned int cpu)
> +{
> + struct xive_ipi_desc *xid = _ipis[early_cpu_to_node(cpu)];
> + int ret;
> +
> + if (atomic_inc_return(>started) > 1)
> + return 0;
> +
> + ret = request_irq(xid->irq, xive_muxed_ipi_action,
> +   IRQF_PERCPU | IRQF_NO_THREAD,
> +   xid->name, NULL);
> +
> + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
> + return ret;
> +}
> +
>  static int xive_setup_cpu_ipi(unsigned int cpu)
>  {
>   unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
> @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
>   if (xc->hw_ipi != XIVE_BAD_IRQ)
>   return 0;
>  
> + /* Register the IPI */
> + xive_request_ipi(cpu);
> +
>   /* Grab an IPI from the backend, this will populate xc->hw_ipi */
>   if (xive_ops->get_ipi(cpu, xc))
>   return -EIO;
> @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, 
> struct xive_cpu *xc)
>   if (xc->hw_ipi == XIVE_BAD_IRQ)
>   return;
>  
> + /* TODO: clear IPI mapping */
> +
>   /* Mask the IPI */
>   xive_do_source_set_mask(>ipi_data, true);
>  
> @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
>   smp_ops->cause_ipi = xive_cause_ipi;
>  
>   /* Register the IPI */
> - xive_request_ipi();
> + xive_init_ipis();
>  
>   /* Allocate and setup IPI for the boot CPU */
>   xive_setup_cpu_ipi(smp_processor_id());
> 



Re: [PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-10 Thread Cédric Le Goater
 
> I forgot to add that this version does break irqbalance anymore, since

... does not break ...

Sorry for the noise,

> Linux is not mapping interrupts of CPU-less nodes.
> 
> Anyhow, irqbalance is now fixed : 
> 
>   
> https://github.com/Irqbalance/irqbalance/commit/a7f81483a95a94d6a62ca7fb999a090e01c0de9b
> 
> So v1 (plus irqbalance patch above) or v2 are safe to use. I do prefer 
> v2.
> 
> Thanks to Laurent and Srikar for the extra tests,
> 
> C.


Re: [PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-10 Thread Srikar Dronamraju
* C?dric Le Goater  [2021-08-07 09:20:57]:

> On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
> runtime. Today, the IPI is not created for such nodes, and hot-plugged
> CPUs use a bogus IPI, which leads to soft lockups.
> 
> We can not directly allocate and request the IPI on demand because
> bringup_up() is called under the IRQ sparse lock. The alternative is
> to allocate the IPIs for all possible nodes at startup and to request
> the mapping on demand when the first CPU of a node is brought up.
> 

Thank you, this version too works for me.

Tested-by: Srikar Dronamraju 


> Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node")
> Cc: sta...@vger.kernel.org # v5.13
> Reported-by: Geetika Moolchandani 
> Cc: Srikar Dronamraju 
> Cc: Laurent Vivier 
> Signed-off-by: Cédric Le Goater 
> Message-Id: <20210629131542.743888-1-...@kaod.org>
> Signed-off-by: Cédric Le Goater 
> ---
>  arch/powerpc/sysdev/xive/common.c | 35 +--
>  1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index dbdbbc2f1dc5..943fd30095af 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
>  static struct xive_ipi_desc {
>   unsigned int irq;
>   char name[16];
> + atomic_t started;
>  } *xive_ipis;
>  
>  /*
> @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops 
> xive_ipi_irq_domain_ops = {
>   .alloc  = xive_ipi_irq_domain_alloc,
>  };
>  
> -static int __init xive_request_ipi(void)
> +static int __init xive_init_ipis(void)
>  {
>   struct fwnode_handle *fwnode;
>   struct irq_domain *ipi_domain;
> @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
>   struct xive_ipi_desc *xid = _ipis[node];
>   struct xive_ipi_alloc_info info = { node };
>  
> - /* Skip nodes without CPUs */
> - if (cpumask_empty(cpumask_of_node(node)))
> - continue;
> -
>   /*
>* Map one IPI interrupt per node for all cpus of that node.
>* Since the HW interrupt number doesn't have any meaning,
> @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
>   xid->irq = ret;
>  
>   snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
> -
> - ret = request_irq(xid->irq, xive_muxed_ipi_action,
> -   IRQF_PERCPU | IRQF_NO_THREAD, xid->name, 
> NULL);
> -
> - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
>   }
>  
>   return ret;
> @@ -1178,6 +1170,22 @@ static int __init xive_request_ipi(void)
>   return ret;
>  }
>  
> +static int __init xive_request_ipi(unsigned int cpu)
> +{
> + struct xive_ipi_desc *xid = _ipis[early_cpu_to_node(cpu)];
> + int ret;
> +
> + if (atomic_inc_return(>started) > 1)
> + return 0;
> +
> + ret = request_irq(xid->irq, xive_muxed_ipi_action,
> +   IRQF_PERCPU | IRQF_NO_THREAD,
> +   xid->name, NULL);
> +
> + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
> + return ret;
> +}
> +
>  static int xive_setup_cpu_ipi(unsigned int cpu)
>  {
>   unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
> @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
>   if (xc->hw_ipi != XIVE_BAD_IRQ)
>   return 0;
>  
> + /* Register the IPI */
> + xive_request_ipi(cpu);
> +
>   /* Grab an IPI from the backend, this will populate xc->hw_ipi */
>   if (xive_ops->get_ipi(cpu, xc))
>   return -EIO;
> @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, 
> struct xive_cpu *xc)
>   if (xc->hw_ipi == XIVE_BAD_IRQ)
>   return;
>  
> + /* TODO: clear IPI mapping */
> +
>   /* Mask the IPI */
>   xive_do_source_set_mask(>ipi_data, true);
>  
> @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
>   smp_ops->cause_ipi = xive_cause_ipi;
>  
>   /* Register the IPI */
> - xive_request_ipi();
> + xive_init_ipis();
>  
>   /* Allocate and setup IPI for the boot CPU */
>   xive_setup_cpu_ipi(smp_processor_id());
> -- 
> 2.31.1
> 

-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-10 Thread Laurent Vivier
On 07/08/2021 09:20, Cédric Le Goater wrote:
> On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
> runtime. Today, the IPI is not created for such nodes, and hot-plugged
> CPUs use a bogus IPI, which leads to soft lockups.
> 
> We can not directly allocate and request the IPI on demand because
> bringup_up() is called under the IRQ sparse lock. The alternative is
> to allocate the IPIs for all possible nodes at startup and to request
> the mapping on demand when the first CPU of a node is brought up.
> 
> Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node")
> Cc: sta...@vger.kernel.org # v5.13
> Reported-by: Geetika Moolchandani 
> Cc: Srikar Dronamraju 
> Cc: Laurent Vivier 
> Signed-off-by: Cédric Le Goater 
> Message-Id: <20210629131542.743888-1-...@kaod.org>
> Signed-off-by: Cédric Le Goater 
> ---
>  arch/powerpc/sysdev/xive/common.c | 35 +--
>  1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index dbdbbc2f1dc5..943fd30095af 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
>  static struct xive_ipi_desc {
>   unsigned int irq;
>   char name[16];
> + atomic_t started;
>  } *xive_ipis;
>  
>  /*
> @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops 
> xive_ipi_irq_domain_ops = {
>   .alloc  = xive_ipi_irq_domain_alloc,
>  };
>  
> -static int __init xive_request_ipi(void)
> +static int __init xive_init_ipis(void)
>  {
>   struct fwnode_handle *fwnode;
>   struct irq_domain *ipi_domain;
> @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
>   struct xive_ipi_desc *xid = _ipis[node];
>   struct xive_ipi_alloc_info info = { node };
>  
> - /* Skip nodes without CPUs */
> - if (cpumask_empty(cpumask_of_node(node)))
> - continue;
> -
>   /*
>* Map one IPI interrupt per node for all cpus of that node.
>* Since the HW interrupt number doesn't have any meaning,
> @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
>   xid->irq = ret;
>  
>   snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
> -
> - ret = request_irq(xid->irq, xive_muxed_ipi_action,
> -   IRQF_PERCPU | IRQF_NO_THREAD, xid->name, 
> NULL);
> -
> - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
>   }
>  
>   return ret;
> @@ -1178,6 +1170,22 @@ static int __init xive_request_ipi(void)
>   return ret;
>  }
>  
> +static int __init xive_request_ipi(unsigned int cpu)
> +{
> + struct xive_ipi_desc *xid = _ipis[early_cpu_to_node(cpu)];
> + int ret;
> +
> + if (atomic_inc_return(>started) > 1)
> + return 0;
> +
> + ret = request_irq(xid->irq, xive_muxed_ipi_action,
> +   IRQF_PERCPU | IRQF_NO_THREAD,
> +   xid->name, NULL);
> +
> + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
> + return ret;
> +}
> +
>  static int xive_setup_cpu_ipi(unsigned int cpu)
>  {
>   unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
> @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
>   if (xc->hw_ipi != XIVE_BAD_IRQ)
>   return 0;
>  
> + /* Register the IPI */
> + xive_request_ipi(cpu);
> +
>   /* Grab an IPI from the backend, this will populate xc->hw_ipi */
>   if (xive_ops->get_ipi(cpu, xc))
>   return -EIO;
> @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, 
> struct xive_cpu *xc)
>   if (xc->hw_ipi == XIVE_BAD_IRQ)
>   return;
>  
> + /* TODO: clear IPI mapping */
> +
>   /* Mask the IPI */
>   xive_do_source_set_mask(>ipi_data, true);
>  
> @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
>   smp_ops->cause_ipi = xive_cause_ipi;
>  
>   /* Register the IPI */
> - xive_request_ipi();
> + xive_init_ipis();
>  
>   /* Allocate and setup IPI for the boot CPU */
>   xive_setup_cpu_ipi(smp_processor_id());
> 

Tested-by: Laurent Vivier 



[PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-07 Thread Cédric Le Goater
On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
runtime. Today, the IPI is not created for such nodes, and hot-plugged
CPUs use a bogus IPI, which leads to soft lockups.

We can not directly allocate and request the IPI on demand because
bringup_up() is called under the IRQ sparse lock. The alternative is
to allocate the IPIs for all possible nodes at startup and to request
the mapping on demand when the first CPU of a node is brought up.

Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node")
Cc: sta...@vger.kernel.org # v5.13
Reported-by: Geetika Moolchandani 
Cc: Srikar Dronamraju 
Cc: Laurent Vivier 
Signed-off-by: Cédric Le Goater 
Message-Id: <20210629131542.743888-1-...@kaod.org>
Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/sysdev/xive/common.c | 35 +--
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index dbdbbc2f1dc5..943fd30095af 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
 static struct xive_ipi_desc {
unsigned int irq;
char name[16];
+   atomic_t started;
 } *xive_ipis;
 
 /*
@@ -1120,7 +1121,7 @@ static const struct irq_domain_ops 
xive_ipi_irq_domain_ops = {
.alloc  = xive_ipi_irq_domain_alloc,
 };
 
-static int __init xive_request_ipi(void)
+static int __init xive_init_ipis(void)
 {
struct fwnode_handle *fwnode;
struct irq_domain *ipi_domain;
@@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
struct xive_ipi_desc *xid = _ipis[node];
struct xive_ipi_alloc_info info = { node };
 
-   /* Skip nodes without CPUs */
-   if (cpumask_empty(cpumask_of_node(node)))
-   continue;
-
/*
 * Map one IPI interrupt per node for all cpus of that node.
 * Since the HW interrupt number doesn't have any meaning,
@@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
xid->irq = ret;
 
snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
-
-   ret = request_irq(xid->irq, xive_muxed_ipi_action,
- IRQF_PERCPU | IRQF_NO_THREAD, xid->name, 
NULL);
-
-   WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
}
 
return ret;
@@ -1178,6 +1170,22 @@ static int __init xive_request_ipi(void)
return ret;
 }
 
+static int __init xive_request_ipi(unsigned int cpu)
+{
+   struct xive_ipi_desc *xid = _ipis[early_cpu_to_node(cpu)];
+   int ret;
+
+   if (atomic_inc_return(>started) > 1)
+   return 0;
+
+   ret = request_irq(xid->irq, xive_muxed_ipi_action,
+ IRQF_PERCPU | IRQF_NO_THREAD,
+ xid->name, NULL);
+
+   WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+   return ret;
+}
+
 static int xive_setup_cpu_ipi(unsigned int cpu)
 {
unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
@@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
if (xc->hw_ipi != XIVE_BAD_IRQ)
return 0;
 
+   /* Register the IPI */
+   xive_request_ipi(cpu);
+
/* Grab an IPI from the backend, this will populate xc->hw_ipi */
if (xive_ops->get_ipi(cpu, xc))
return -EIO;
@@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct 
xive_cpu *xc)
if (xc->hw_ipi == XIVE_BAD_IRQ)
return;
 
+   /* TODO: clear IPI mapping */
+
/* Mask the IPI */
xive_do_source_set_mask(>ipi_data, true);
 
@@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
smp_ops->cause_ipi = xive_cause_ipi;
 
/* Register the IPI */
-   xive_request_ipi();
+   xive_init_ipis();
 
/* Allocate and setup IPI for the boot CPU */
xive_setup_cpu_ipi(smp_processor_id());
-- 
2.31.1