On pseries LPAR systems, watchdog timers configured from userspace
can remain active after a kernel panic. During panic triggered crash
dump capture, the crashing kernel jumps directly to the kdump kernel
without shutting down userspace services. As a result, active
watchdogs are not stopped before entering the kdump kernel.
If dump capture takes longer than the watchdog timeout, PHYP resets
the LPAR before dump collection completes, resulting in dump capture
failure.
Fix this by issuing the H_WATCHDOG hcall on the crash shutdown path
to stop all active watchdogs before booting the kdump kernel.
Fixes: 69472ffa6575 ("watchdog/pseries-wdt: initial support for
H_WATCHDOG-based watchdog timers")
Reported-by: Mahesh Kumar G <[email protected]>
Signed-off-by: Sourabh Jain <[email protected]>
---
arch/powerpc/kexec/crash.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index e6539f213b3d..5651523e3a70 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -28,6 +28,7 @@
#include <asm/interrupt.h>
#include <asm/kexec_ranges.h>
#include <asm/crashdump-ppc64.h>
+#include <asm/hvcall.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -352,6 +353,28 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
}
EXPORT_SYMBOL(crash_shutdown_unregister);
+/**
+ * stop_watchdogs - Stop active watchdogs before entering kdump kernel
+ * On pseries LPAR systems, watchdogs configured from userspace remain
+ * active after a kernel panic because userspace services are not shut
+ * down on the kdump crash path. If a watchdog expires while the kdump
+ * kernel is collecting the dump, PHYP resets the LPAR and dump capture
+ * fails
+ *
+ * 0x200UL : watchdog stop operation
+ * -1 : watchdog number, disable all watchdogs
+ */
+static void stop_watchdogs(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ int rc;
+
+ rc = plpar_hcall_norets_notrace(H_WATCHDOG, 0x200UL, -1);
+ if (rc != H_SUCCESS && rc != H_NOOP)
+ pr_warn("crash: failed to stop watchdogs\n");
+ }
+}
+
void default_machine_crash_shutdown(struct pt_regs *regs)
{
volatile unsigned int i;
@@ -360,6 +383,8 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
is_via_system_reset = 1;
+ stop_watchdogs();
+
if (IS_ENABLED(CONFIG_SMP))
crash_smp_send_stop();
else
--
2.52.0