Konstantin Baydarov (on Mon, 9 Jul 2007 19:39:16 +0400) wrote:
>When I spend more that 10 seconds in KDB console and then exit from KDB, 
>Kernel think that current clocksource is unstable and change it. I'm using 
>2.6.22-rc7 kdb on SMP i386 system. Here is log:
>Before doing sync, I've set breakpoint to do_sync().
>[EMAIL PROTECTED]:~#
>[EMAIL PROTECTED]:~# cat 
>/sys/devices/system/clocksource/clocksource0/current_clocksource
>tsc
>[EMAIL PROTECTED]:~# sync
>Instruction(i) breakpoint #0 at 0xc017b64a (adjusted)
>0xc017b64a do_sync:         int3
>
>Entering kdb (current=0xc16f3a50, pid 2983) on processor 0 due to Breakpoint @ 
>0xc017b64a
>[0]kdb> go
>Clocksource tsc unstable (delta = 14060902198 ns)
>[EMAIL PROTECTED]:~# Time: acpi_pm clocksource has been installed.
>
>[EMAIL PROTECTED]:~#
>[EMAIL PROTECTED]:~# cat 
>/sys/devices/system/clocksource/clocksource0/current_clocksource
>acpi_pm
>[EMAIL PROTECTED]:~#
>[EMAIL PROTECTED]:~#
>
>Issue: tsc clocksource was replaced by acpi_pm.
>
>The reason of issue:
>Current clocksource(tsc) in kernel have a watchdog - another 
>clocksource(acpi_pm). clocksource_watchdog() that updates
>watchdog_last timestamp runs with help of kernel timer that is disabled when 
>kernel enters kdb. So watchdog clocksource(acpi_pm) can overflow and when 
>kernel exits kdb, watchdog clocksource can report wrong time delta - that's 
>why kernel can think that current clocksource is unstable and change it.
>
>How solved:
>I suspend/resume timekeeping when we enter/exit kdb. Suspend/resume of 
>timekeeping suspends/resumes current clocksource and watchdog clocksource.
>Also patch prevents potential softlockup warnings that appear in earlier 
>kernels.
>
>Thanks.
>
>Signed-off-by: Konstantin Baydarov <[EMAIL PROTECTED]>
>
> kdb/kdbmain.c             |   17 +++++++++++++++++
> kernel/time/timekeeping.c |   27 +++++++++++++++++++++++++++
> 2 files changed, 44 insertions(+)
>
>Index: linux-2.6.22-rc7/kdb/kdbmain.c
>===================================================================
>--- linux-2.6.22-rc7.orig/kdb/kdbmain.c
>+++ linux-2.6.22-rc7/kdb/kdbmain.c
>@@ -47,6 +47,9 @@
> #include <asm/system.h>
> #include <asm/kdebug.h>
> 
>+int kdb_timekeeping_suspend(void);
>+int kdb_timekeeping_resume(void);
>+
> /*
>  * Kernel debugger state flags
>  */
>@@ -60,6 +63,7 @@ atomic_t kdb_8250;
>  */
> static DEFINE_SPINLOCK(kdb_lock);
> volatile int kdb_initial_cpu = -1;            /* cpu number that owns kdb */
>+volatile int kdb_initial_cpu_save = -1;               /* cpu number that owns 
>kdb */
> int kdb_seqno = 2;                            /* how many times kdb has been 
> entered */
> 
> volatile int kdb_nextline = 1;
>@@ -1998,6 +2002,11 @@ kdb(kdb_reason_t reason, int error, stru
>                       smp_kdb_stop();
>                       KDB_DEBUG_STATE("kdb 8", reason);
>               }
>+              /* Suspend clocksource, when entering kdb, to prevent
>+               * false soft lockup warnings and switching to another
>+               * clocksource.
>+               */
>+              kdb_timekeeping_suspend();
>       }
> 
>       if (KDB_STATE(GO1)) {
>@@ -2020,6 +2029,7 @@ kdb(kdb_reason_t reason, int error, stru
>       if (result == KDB_CMD_GO && KDB_STATE(SSBPT))
>               KDB_STATE_SET(GO1);
> 
>+      kdb_initial_cpu_save = kdb_initial_cpu;
>       if (smp_processor_id() == kdb_initial_cpu &&
>         !KDB_STATE(DOING_SS) &&
>         !KDB_STATE(RECURSE)) {
>@@ -2055,6 +2065,13 @@ kdb(kdb_reason_t reason, int error, stru
>               }
>       }
> 
>+      /* Only do this work if we are really leaving kdb */
>+      if (!(KDB_STATE(DOING_SS) || KDB_STATE(SSBPT) || KDB_STATE(RECURSE))) {
>+              if(smp_processor_id() == kdb_initial_cpu_save)
>+                      /* Resume clocksource when initial cpu leaves kdb */
>+                      kdb_timekeeping_resume();
>+      }
>+
>       KDB_DEBUG_STATE("kdb 14", result);
>       kdba_restoreint(&int_state);
> #ifdef  CONFIG_CPU_XSCALE
>Index: linux-2.6.22-rc7/kernel/time/timekeeping.c
>===================================================================
>--- linux-2.6.22-rc7.orig/kernel/time/timekeeping.c
>+++ linux-2.6.22-rc7/kernel/time/timekeeping.c
>@@ -299,6 +299,19 @@ static int timekeeping_resume(struct sys
>       return 0;
> }
> 
>+#if defined(CONFIG_KDB) || defined(CONFIG_KDB_MODULE)
>+int kdb_timekeeping_resume(void)
>+{
>+      int ret;
>+      struct sys_device dev;
>+
>+      ret = timekeeping_resume(&dev);
>+
>+      return ret;
>+}
>+EXPORT_SYMBOL(kdb_timekeeping_resume);
>+#endif
>+
> static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
> {
>       unsigned long flags;
>@@ -313,6 +326,20 @@ static int timekeeping_suspend(struct sy
>       return 0;
> }
> 
>+#if defined(CONFIG_KDB) || defined(CONFIG_KDB_MODULE)
>+int kdb_timekeeping_suspend(void)
>+{
>+      int ret;
>+      struct sys_device dev;
>+      pm_message_t state;
>+
>+      ret = timekeeping_suspend(&dev, state);
>+
>+      return ret;
>+}
>+EXPORT_SYMBOL(kdb_timekeeping_suspend);
>+#endif
>+
> /* sysfs resume/suspend bits for timekeeping */
> static struct sysdev_class timekeeping_sysclass = {
>       .resume         = timekeeping_resume,

Thanks for the patch and sorry for the delay in replying.  I am trying
to catch up on a backlog of kdb patches.

When kdb was written, it was the only kernel debugger and we made the
mistake of adding kdb_*() helper functions in the rest of the kernel.
Now we have choices for debuggers, and all of the debuggers have the
same problems when they stop the system.  So make the change work for
all debuggers.  Does this work for you?  Against kdb-v4.4-2.6.23-rc1-common-1.

Index: linux/kdb/kdbmain.c
===================================================================
--- linux.orig/kdb/kdbmain.c    2007-08-03 18:00:10.558422200 +1000
+++ linux/kdb/kdbmain.c 2007-08-03 17:59:53.840581765 +1000
@@ -41,6 +41,7 @@
 #endif
 #include <linux/cpu.h>
 #include <linux/kdebug.h>
+#include <linux/time_func.h>
 
 #include <acpi/acpi_bus.h>
 
@@ -1737,6 +1738,8 @@ kdb(kdb_reason_t reason, int error, stru
        int ss_event, old_regs_saved = 0;
        struct pt_regs *old_regs = NULL;
        kdb_dbtrap_t db_result=KDB_DB_NOBPT;
+       pm_message_t pm_message_state;
+
        preempt_disable();
        atomic_inc(&kdb_event);
 
@@ -1998,6 +2001,11 @@ kdb(kdb_reason_t reason, int error, stru
                        smp_kdb_stop();
                        KDB_DEBUG_STATE("kdb 8", reason);
                }
+               /* Suspend clocksource, when entering kdb, to prevent
+                * false soft lockup warnings and switching to another
+                * clocksource.
+                */
+               timekeeping_suspend(NULL, pm_message_state);
        }
 
        if (KDB_STATE(GO1)) {
@@ -2050,6 +2058,7 @@ kdb(kdb_reason_t reason, int error, stru
                                ;
                        if (!kdb_quiet(reason))
                                notify_die(DIE_KDEBUG_LEAVE, "KDEBUG LEAVE", 
regs, error, 0, 0);
+                       timekeeping_resume(NULL);
                        kdb_initial_cpu = -1;   /* release kdb control */
                        KDB_DEBUG_STATE("kdb 13", reason);
                }
Index: linux/include/linux/time_func.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/time_func.h     2007-08-03 17:11:48.643556233 +1000
@@ -0,0 +1,19 @@
+#ifndef _LINUX_TIME_FUNC_H
+#define _LINUX_TIME_FUNC_H
+
+/* Define kernel functions used by time sources.  These cannot be included in
+ * time.h because their parameter definitions will pull in a long chain of
+ * other defines which make it much more complicated to build asm-offsets.c.
+ */
+
+#include <linux/pm.h>
+#include <linux/sysdev.h>
+
+/* These functions take sys_device and pm_message_t parameters but they are not
+ * used.  You can safely pass NULL parameters to these functions.
+ */
+
+extern int timekeeping_resume(struct sys_device *dev);
+extern int timekeeping_suspend(struct sys_device *dev, pm_message_t state);
+
+#endif
Index: linux/kernel/time/timekeeping.c
===================================================================
--- linux.orig/kernel/time/timekeeping.c        2007-08-03 16:41:45.007031263 
+1000
+++ linux/kernel/time/timekeeping.c     2007-08-03 17:08:34.460707523 +1000
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
+#include <linux/time_func.h>
 #include <linux/tick.h>
 
 
@@ -277,7 +278,7 @@ static unsigned long timekeeping_suspend
  * xtime/wall_to_monotonic/jiffies/etc are
  * still managed by arch specific suspend/resume code.
  */
-static int timekeeping_resume(struct sys_device *dev)
+int timekeeping_resume(struct sys_device *dev)
 {
        unsigned long flags;
        unsigned long now = read_persistent_clock();
@@ -308,8 +309,9 @@ static int timekeeping_resume(struct sys
 
        return 0;
 }
+EXPORT_SYMBOL(timekeeping_resume);
 
-static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
+int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 {
        unsigned long flags;
 
@@ -322,6 +324,7 @@ static int timekeeping_suspend(struct sy
 
        return 0;
 }
+EXPORT_SYMBOL(timekeeping_suspend);
 
 /* sysfs resume/suspend bits for timekeeping */
 static struct sysdev_class timekeeping_sysclass = {

---------------------------
Use http://oss.sgi.com/ecartis to modify your settings or to unsubscribe.

Reply via email to