Instead of periodically resetting watchdogs from thread context,
this patch simply forces resched and checks rq->sched_count.
Watchdog is reset if the sched count increases. If the same thread
is picked by pick_next_task during resched, there is no context
switch.

With the new method we lose coverage on: a migration/n thread
actually gets picked and we actually context switch to the
migration/n thread. These steps are unlikely to silently fail.
The change would provide nearly the same level of protection with
less latency / jitter.

Suggested-by: Paul Turner <p...@google.com>
Suggested-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Xi Wang <x...@google.com>
---
 include/linux/sched.h |  4 ++++
 kernel/sched/core.c   | 23 +++++++++++++++++++--
 kernel/sched/sched.h  |  6 +++++-
 kernel/watchdog.c     | 47 +++++++++++++------------------------------
 4 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d383cf09e78f..1e3bef9a9cdb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1662,6 +1662,10 @@ extern int sched_setattr(struct task_struct *, const 
struct sched_attr *);
 extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr 
*);
 extern struct task_struct *idle_task(int cpu);
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+extern unsigned int sched_get_count(int cpu);
+#endif
+
 /**
  * is_idle_task - is the specified task an idle task?
  * @p: the task in question.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8160ab5263f8..378f0f36c402 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4293,8 +4293,6 @@ static inline void schedule_debug(struct task_struct 
*prev, bool preempt)
        rcu_sleep_check();
 
        profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-
-       schedstat_inc(this_rq()->sched_count);
 }
 
 static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
@@ -4492,6 +4490,12 @@ static void __sched notrace __schedule(bool preempt)
        clear_tsk_need_resched(prev);
        clear_preempt_need_resched();
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+       this_rq()->sched_count++; /* sched count is also used by watchdog */
+#else
+       schedstat_inc(this_rq()->sched_count);
+#endif
+
        if (likely(prev != next)) {
                rq->nr_switches++;
                /*
@@ -5117,6 +5121,21 @@ struct task_struct *idle_task(int cpu)
        return cpu_rq(cpu)->idle;
 }
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/**
+ * sched_get_count - get the sched count of a CPU.
+ * @cpu: the CPU in question.
+ *
+ * Return: sched count.
+ */
+unsigned int sched_get_count(int cpu)
+{
+       return cpu_rq(cpu)->sched_count;
+}
+
+#endif
+
 /**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 28709f6b0975..f23255981d52 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -959,6 +959,11 @@ struct rq {
        u64                     clock_pelt;
        unsigned long           lost_idle_time;
 
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_SOFTLOCKUP_DETECTOR)
+       /* Also used by watchdog - no longer grouping with other sched stats */
+       unsigned int            sched_count;
+#endif
+
        atomic_t                nr_iowait;
 
 #ifdef CONFIG_MEMBARRIER
@@ -1036,7 +1041,6 @@ struct rq {
        unsigned int            yld_count;
 
        /* schedule() stats */
-       unsigned int            sched_count;
        unsigned int            sched_goidle;
 
        /* try_to_wake_up() stats */
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5abb5b22ad13..22f87aded95a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -170,6 +170,7 @@ static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
+static DEFINE_PER_CPU(unsigned int, watchdog_sched_prev);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
@@ -239,6 +240,7 @@ static void set_sample_period(void)
 static void __touch_watchdog(void)
 {
        __this_cpu_write(watchdog_touch_ts, get_timestamp());
+       __this_cpu_write(watchdog_sched_prev, 
sched_get_count(smp_processor_id()));
 }
 
 /**
@@ -318,25 +320,6 @@ static void watchdog_interrupt_count(void)
        __this_cpu_inc(hrtimer_interrupts);
 }
 
-static DEFINE_PER_CPU(struct completion, softlockup_completion);
-static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
-
-/*
- * The watchdog thread function - touches the timestamp.
- *
- * It only runs once every sample_period seconds (4 seconds by
- * default) to reset the softlockup timestamp. If this gets delayed
- * for more than 2*watchdog_thresh seconds then the debug-printout
- * triggers in watchdog_timer_fn().
- */
-static int softlockup_fn(void *data)
-{
-       __touch_watchdog();
-       complete(this_cpu_ptr(&softlockup_completion));
-
-       return 0;
-}
-
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -351,15 +334,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct 
hrtimer *hrtimer)
        /* kick the hardlockup detector */
        watchdog_interrupt_count();
 
-       /* kick the softlockup detector */
-       if (completion_done(this_cpu_ptr(&softlockup_completion))) {
-               reinit_completion(this_cpu_ptr(&softlockup_completion));
-               stop_one_cpu_nowait(smp_processor_id(),
-                               softlockup_fn, NULL,
-                               this_cpu_ptr(&softlockup_stop_work));
-       }
-
-       /* .. and repeat */
        hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
        if (touch_ts == SOFTLOCKUP_RESET) {
@@ -378,6 +352,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct 
hrtimer *hrtimer)
                return HRTIMER_RESTART;
        }
 
+       /* Trigger reschedule for the next round */
+       set_tsk_need_resched(current);
+       set_preempt_need_resched();
+
+       /* sched_count increase in __schedule is taken as watchdog touched */
+       if (sched_get_count(smp_processor_id()) -
+           __this_cpu_read(watchdog_sched_prev)) {
+               __touch_watchdog();
+               __this_cpu_write(soft_watchdog_warn, false);
+               return HRTIMER_RESTART;
+       }
+
        /* check for a softlockup
         * This is done by making sure a high priority task is
         * being scheduled.  The task touches the watchdog to
@@ -443,13 +429,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct 
hrtimer *hrtimer)
 static void watchdog_enable(unsigned int cpu)
 {
        struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
-       struct completion *done = this_cpu_ptr(&softlockup_completion);
 
        WARN_ON_ONCE(cpu != smp_processor_id());
 
-       init_completion(done);
-       complete(done);
-
        /*
         * Start the timer first to prevent the NMI watchdog triggering
         * before the timer has a chance to fire.
@@ -479,7 +461,6 @@ static void watchdog_disable(unsigned int cpu)
         */
        watchdog_nmi_disable(cpu);
        hrtimer_cancel(hrtimer);
-       wait_for_completion(this_cpu_ptr(&softlockup_completion));
 }
 
 static int softlockup_stop_fn(void *data)
-- 
2.29.0.rc2.309.g374f81d7ae-goog

Reply via email to