On 9/3/18 11:33 AM, Peter Zijlstra wrote:
On Mon, Sep 03, 2018 at 10:54:23AM +0200, Peter Zijlstra wrote:
On Mon, Sep 03, 2018 at 09:38:15AM +0200, Thomas Gleixner wrote:
On Mon, 3 Sep 2018, Peter Zijlstra wrote:
On Sat, Sep 01, 2018 at 11:51:26AM +0930, Kevin Shanahan wrote:
commit 01548f4d3e8e94caf323a4f664eb347fd34a34ab
Author: Martin Schwidefsky <schwidef...@de.ibm.com>
Date:   Tue Aug 18 17:09:42 2009 +0200

     clocksource: Avoid clocksource watchdog circular locking dependency

     stop_machine from a multithreaded workqueue is not allowed because
     of a circular locking dependency between cpu_down and the workqueue
     execution. Use a kernel thread to do the clocksource downgrade.

I cannot find stop_machine usage there; either it went away or I need to
like wake up.

timekeeping_notify() which is involved in switching clock source uses stomp
machine.

ARGH... OK, lemme see if I can come up with something other than
endlessly spawning that kthread.

A special purpose kthread_worker would make more sense than that.

Can someone test this?

---
  kernel/time/clocksource.c | 28 ++++++++++++++++++++++------
  1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f74fb00d8064..898976d0082a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -112,13 +112,28 @@ static int finished_booting;
  static u64 suspend_start;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
-static void clocksource_watchdog_work(struct work_struct *work);
+static void clocksource_watchdog_work(struct kthread_work *work);
  static void clocksource_select(void);
static LIST_HEAD(watchdog_list);
  static struct clocksource *watchdog;
  static struct timer_list watchdog_timer;
-static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
+
+/*
+ * We must use a kthread_worker here, because:
+ *
+ *   clocksource_watchdog_work()
+ *     clocksource_select()
+ *       __clocksource_select()
+ *         timekeeping_notify()
+ *           stop_machine()
+ *
+ * cannot be called from a reqular workqueue, because of deadlocks between
+ * workqueue and stopmachine.
+ */
+static struct kthread_worker *watchdog_worker;
+static DEFINE_KTHREAD_WORK(watchdog_work, clocksource_watchdog_work);
+
  static DEFINE_SPINLOCK(watchdog_lock);
  static int watchdog_running;
  static atomic_t watchdog_reset_pending;
@@ -158,7 +173,7 @@ static void __clocksource_unstable(struct clocksource *cs)
/* kick clocksource_watchdog_work() */
        if (finished_booting)
-               schedule_work(&watchdog_work);
+               kthread_queue_work(watchdog_worker, &watchdog_work);
  }
/**
@@ -199,7 +214,7 @@ static void clocksource_watchdog(struct timer_list *unused)
                /* Clocksource already marked unstable? */
                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
                        if (finished_booting)
-                               schedule_work(&watchdog_work);
+                               kthread_queue_work(watchdog_worker, 
&watchdog_work);
                        continue;
                }
@@ -269,7 +284,7 @@ static void clocksource_watchdog(struct timer_list *unused)
                         */
                        if (cs != curr_clocksource) {
                                cs->flags |= CLOCK_SOURCE_RESELECT;
-                               schedule_work(&watchdog_work);
+                               kthread_queue_work(watchdog_worker, 
&watchdog_work);
                        } else {
                                tick_clock_notify();
                        }
@@ -418,7 +433,7 @@ static int __clocksource_watchdog_work(void)
        return select;
  }
-static void clocksource_watchdog_work(struct work_struct *work)
+static void clocksource_watchdog_work(struct kthread_work *work)
  {
        mutex_lock(&clocksource_mutex);
        if (__clocksource_watchdog_work())
@@ -806,6 +821,7 @@ static int __init clocksource_done_booting(void)
  {
        mutex_lock(&clocksource_mutex);
        curr_clocksource = clocksource_default_clock();
+       watchdog_worker = kthread_create_worker(0, "cs-watchdog");
        finished_booting = 1;
        /*
         * Run the watchdog first to eliminate unstable clock sources


Successfully booted my Intel Core 2 Duo with the patch applied on top of
4.18.5 (based on default Arch Linux config).

I tested with at least 8/8 successful boots in total - with no
additional kernel boot parameters and also with "quiet", and "debug".
No problems seen so far.

Thank you for your effort and developing this patch.


Siegfried

Reply via email to