Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
---
Changes:
v2:
* added ppc_md.orphan_irq
---
Found it on P9 system with:
- a host with 8 cpus online
- a boot disk on ahci with its msix on cpu#0
- a guest with 2xGPUs + 6xNVLink + 4 cpus
- GPU#0 from the guest is bound to the same cpu#0.
Killing a guest killed ahci and therefore the host because of the race.
Note that VFIO masks interrupts first and only then resets the device.
---
arch/powerpc/include/asm/machdep.h | 3 +++
arch/powerpc/kernel/irq.c | 9 ++++++---
arch/powerpc/sysdev/xive/common.c | 10 ++++++++++
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/machdep.h
b/arch/powerpc/include/asm/machdep.h
index c43d6eca9edd..6cc14e28e89a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -59,6 +59,9 @@ struct machdep_calls {
/* Return an irq, or 0 to indicate there are none pending. */
unsigned int (*get_irq)(void);
+ /* Drops irq if it does not have a valid descriptor */
+ void (*orphan_irq)(unsigned int irq);
+
/* PCI stuff */
/* Called after allocating resources */
void (*pcibios_fixup)(void);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bc68c53af67c..b4e06d05bdba 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -632,10 +632,13 @@ void __do_irq(struct pt_regs *regs)
may_hard_irq_enable();
/* And finally process it */
- if (unlikely(!irq))
+ if (unlikely(!irq)) {
__this_cpu_inc(irq_stat.spurious_irqs);
- else
- generic_handle_irq(irq);
+ } else if (generic_handle_irq(irq)) {
+ if (ppc_md.orphan_irq)
+ ppc_md.orphan_irq(irq);
+ __this_cpu_inc(irq_stat.spurious_irqs);
+ }
trace_irq_exit(regs);
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 082c7e1c20f0..b4054091999a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -283,6 +283,15 @@ static unsigned int xive_get_irq(void)
return irq;
}
+static void xive_orphan_irq(unsigned int irq)
+{
+ struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+ xc->cppr = 0xff;
+ out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
+ DBG_VERBOSE("orphan_irq: irq %d, adjusting CPPR to 0xff\n", irq);
+}
+
/*
* After EOI'ing an interrupt, we need to re-check the queue
* to see if another interrupt is pending since multiple
@@ -1419,6 +1428,7 @@ bool __init xive_core_init(const struct xive_ops *ops,
void __iomem *area, u32 o
xive_irq_priority = max_prio;
ppc_md.get_irq = xive_get_irq;
+ ppc_md.orphan_irq = xive_orphan_irq;
__xive_enabled = true;
pr_devel("Initializing host..\n");