[tip:core/urgent] kernel/watchdog: Prevent false positives with turbo modes

2017-08-18 Thread tip-bot for Thomas Gleixner
Commit-ID:  7edaeb6841dfb27e362288ab8466ebdc4972e867
Gitweb: http://git.kernel.org/tip/7edaeb6841dfb27e362288ab8466ebdc4972e867
Author: Thomas Gleixner 
AuthorDate: Tue, 15 Aug 2017 09:50:13 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 18 Aug 2017 12:35:02 +0200

kernel/watchdog: Prevent false positives with turbo modes

The hardlockup detector on x86 uses a performance counter based on unhalted
CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
performance counter period, so the hrtimer should fire 2-3 times before the
performance counter NMI fires. The NMI code checks whether the hrtimer
fired since the last invocation. If not, it assumess a hard lockup.

The calculation of those periods is based on the nominal CPU
frequency. Turbo modes increase the CPU clock frequency and therefore
shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
nominal frequency) the perf/NMI period is shorter than the hrtimer period
which leads to false positives.

A simple fix would be to shorten the hrtimer period, but that comes with
the side effect of more frequent hrtimer and softlockup thread wakeups,
which is not desired.

Implement a low pass filter, which checks the perf/NMI period against
kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
elapsed then the event is ignored and postponed to the next perf/NMI.

That solves the problem and avoids the overhead of shorter hrtimer periods
and more frequent softlockup thread wakeups.

Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup 
detector")
Reported-and-tested-by: Kan Liang 
Signed-off-by: Thomas Gleixner 
Cc: dzic...@redhat.com
Cc: pra...@redhat.com
Cc: a...@linux.intel.com
Cc: babu.mo...@oracle.com
Cc: pet...@infradead.org
Cc: eran...@google.com
Cc: a...@redhat.com
Cc: sta...@vger.kernel.org
Cc: atom...@redhat.com
Cc: a...@linux-foundation.org
Cc: torva...@linux-foundation.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
---
 arch/x86/Kconfig  |  1 +
 include/linux/nmi.h   |  8 +++
 kernel/watchdog.c |  1 +
 kernel/watchdog_hld.c | 59 +++
 lib/Kconfig.debug |  7 ++
 5 files changed, 76 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 781521b..9101bfc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+   select HARDLOCKUP_CHECK_TIMESTAMP   if X86_64
select HAVE_ACPI_APEI   if ACPI
select HAVE_ACPI_APEI_NMI   if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 8aa01fd..a36abe2 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_softlockup_all_cpu_backtrace 0
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
+
+#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
+defined(CONFIG_HARDLOCKUP_DETECTOR)
+void watchdog_update_hrtimer_threshold(u64 period);
+#else
+static inline void watchdog_update_hrtimer_threshold(u64 period) { }
+#endif
+
 extern bool is_hardlockup(void);
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 06d3389..f5d5202 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -240,6 +240,7 @@ static void set_sample_period(void)
 * hardlockup detector generates a warning
 */
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
+   watchdog_update_hrtimer_threshold(sample_period);
 }
 
 /* Commands for resetting the watchdog */
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 295a0d8..3a09ea1 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
+#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
+static DEFINE_PER_CPU(ktime_t, last_timestamp);
+static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
+static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
+
+void watchdog_update_hrtimer_threshold(u64 period)
+{
+   /*
+* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
+*
+* So it runs effectively with 2.5 times the rate of the NMI
+* watchdog. That means the hrtimer should fire 2-3 times before
+* the NMI watchdog expires. The NMI watchdog on x86 is based on
+* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
+* might run way faster than expected and the NMI fires in a
+* smaller period than the one deduced from the nominal CPU
+* frequency. Depending on 

[tip:core/urgent] kernel/watchdog: Prevent false positives with turbo modes

2017-08-18 Thread tip-bot for Thomas Gleixner
Commit-ID:  7edaeb6841dfb27e362288ab8466ebdc4972e867
Gitweb: http://git.kernel.org/tip/7edaeb6841dfb27e362288ab8466ebdc4972e867
Author: Thomas Gleixner 
AuthorDate: Tue, 15 Aug 2017 09:50:13 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 18 Aug 2017 12:35:02 +0200

kernel/watchdog: Prevent false positives with turbo modes

The hardlockup detector on x86 uses a performance counter based on unhalted
CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
performance counter period, so the hrtimer should fire 2-3 times before the
performance counter NMI fires. The NMI code checks whether the hrtimer
fired since the last invocation. If not, it assumess a hard lockup.

The calculation of those periods is based on the nominal CPU
frequency. Turbo modes increase the CPU clock frequency and therefore
shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
nominal frequency) the perf/NMI period is shorter than the hrtimer period
which leads to false positives.

A simple fix would be to shorten the hrtimer period, but that comes with
the side effect of more frequent hrtimer and softlockup thread wakeups,
which is not desired.

Implement a low pass filter, which checks the perf/NMI period against
kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
elapsed then the event is ignored and postponed to the next perf/NMI.

That solves the problem and avoids the overhead of shorter hrtimer periods
and more frequent softlockup thread wakeups.

Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup 
detector")
Reported-and-tested-by: Kan Liang 
Signed-off-by: Thomas Gleixner 
Cc: dzic...@redhat.com
Cc: pra...@redhat.com
Cc: a...@linux.intel.com
Cc: babu.mo...@oracle.com
Cc: pet...@infradead.org
Cc: eran...@google.com
Cc: a...@redhat.com
Cc: sta...@vger.kernel.org
Cc: atom...@redhat.com
Cc: a...@linux-foundation.org
Cc: torva...@linux-foundation.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
---
 arch/x86/Kconfig  |  1 +
 include/linux/nmi.h   |  8 +++
 kernel/watchdog.c |  1 +
 kernel/watchdog_hld.c | 59 +++
 lib/Kconfig.debug |  7 ++
 5 files changed, 76 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 781521b..9101bfc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+   select HARDLOCKUP_CHECK_TIMESTAMP   if X86_64
select HAVE_ACPI_APEI   if ACPI
select HAVE_ACPI_APEI_NMI   if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 8aa01fd..a36abe2 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_softlockup_all_cpu_backtrace 0
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
+
+#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
+defined(CONFIG_HARDLOCKUP_DETECTOR)
+void watchdog_update_hrtimer_threshold(u64 period);
+#else
+static inline void watchdog_update_hrtimer_threshold(u64 period) { }
+#endif
+
 extern bool is_hardlockup(void);
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 06d3389..f5d5202 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -240,6 +240,7 @@ static void set_sample_period(void)
 * hardlockup detector generates a warning
 */
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
+   watchdog_update_hrtimer_threshold(sample_period);
 }
 
 /* Commands for resetting the watchdog */
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 295a0d8..3a09ea1 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
+#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
+static DEFINE_PER_CPU(ktime_t, last_timestamp);
+static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
+static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
+
+void watchdog_update_hrtimer_threshold(u64 period)
+{
+   /*
+* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
+*
+* So it runs effectively with 2.5 times the rate of the NMI
+* watchdog. That means the hrtimer should fire 2-3 times before
+* the NMI watchdog expires. The NMI watchdog on x86 is based on
+* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
+* might run way faster than expected and the NMI fires in a
+* smaller period than the one deduced from the nominal CPU
+* frequency. Depending on the Turbo-Mode factor this might be fast
+* enough to get the NMI period