Problem:
Sometimes(after remote gdb was connected) x86 SMP kernel(with KGDB and NMI 
watchdog enabled) hangs when kernel modules are automatically loaded.

Root Cause: 
  Slave CPU hangs in kgdb_wait() when master CPU leaves KGDB, causing the whole
system to hang.
  If watchdog NMI occurs when Slave CPU have already exited kgdb_wait() and 
Master CPU haven't unset debugger_active, then Slave CPU can reenter 
kgdb_wait(). As (procindebug[atomic_read(&debugger_active) - 1) is zero(Master 
CPU have set procindebug[MasterCPU] to zero before exit), Slave loops in 
kgdb_wait():
...
        /* Wait till master processor goes completely into the debugger.
         */
        while (!atomic_read(&procindebug[atomic_read(&debugger_active) - 1])) {
                int i = 10;     /* an arbitrary number */

                while (--i)
                        cpu_relax();
        }
...
Slave CPU loops until Master CPU completely exits KGDB and set debugger_active 
to zero.
But when debugger_active became zero, Slave CPU don't leaves loop, instead it 
hangs in while loop, because it starts to check procindebug[-1], because 
atomic_read(&debugger_active) = 0:
...
while (!atomic_read(&procindebug[atomic_read(&debugger_active) - 1])){...}
...
For me procindebug[-1] is always zero, so Slave CPU hangs in NMI handler and 
stops accept NMIs. It leads to whole system hang.

How Solved:
  New atomic variable debugger_exiting was added. It's set when Master CPU 
starts
waiting Slave CPUs, and is reset after debugger_active is set to zero. Variable
debugger_exiting is checked in kgdb_notify() and kgdb_nmihook wouldn't be
called until debugger_exiting equal zero. So debugger_exiting guaranties that 
Slave CPU won't reenter kgdb_wait() until Master CPU completely leaves KGDB.
  Patch against kernel 2.6.24.3.

Signed-off-by: Konstantin Baydarov <[EMAIL PROTECTED]>

 arch/x86/kernel/kgdb_32.c |    9 ++++++---
 arch/x86/kernel/kgdb_64.c |    9 ++++++---
 include/linux/kgdb.h      |    1 +
 kernel/kgdb.c             |    4 ++++
 4 files changed, 17 insertions(+), 6 deletions(-)

Index: ko_2_6_24_3_kgdb/arch/x86/kernel/kgdb_32.c
===================================================================
--- ko_2_6_24_3_kgdb.orig/arch/x86/kernel/kgdb_32.c
+++ ko_2_6_24_3_kgdb/arch/x86/kernel/kgdb_32.c
@@ -326,14 +326,16 @@ static int kgdb_notify(struct notifier_b
 
        switch (cmd) {
        case DIE_NMI:
-               if (atomic_read(&debugger_active)) {
+               if (atomic_read(&debugger_active) &&
+                   !atomic_read(&debugger_exiting)) {
                        /* KGDB CPU roundup */
                        kgdb_nmihook(raw_smp_processor_id(), regs);
                        return NOTIFY_STOP;
                }
                return NOTIFY_DONE;
        case DIE_NMI_IPI:
-               if (atomic_read(&debugger_active)) {
+               if (atomic_read(&debugger_active) &&
+                   !atomic_read(&debugger_exiting)) {
                        /* KGDB CPU roundup */
                        if (kgdb_nmihook(raw_smp_processor_id(), regs))
                                return NOTIFY_DONE;
@@ -341,7 +343,8 @@ static int kgdb_notify(struct notifier_b
                }
                return NOTIFY_DONE;
        case DIE_NMIWATCHDOG:
-               if (atomic_read(&debugger_active)) {
+               if (atomic_read(&debugger_active) &&
+                   !atomic_read(&debugger_exiting)) {
                        /* KGDB CPU roundup */
                        kgdb_nmihook(raw_smp_processor_id(), regs);
                        return NOTIFY_STOP;
Index: ko_2_6_24_3_kgdb/arch/x86/kernel/kgdb_64.c
===================================================================
--- ko_2_6_24_3_kgdb.orig/arch/x86/kernel/kgdb_64.c
+++ ko_2_6_24_3_kgdb/arch/x86/kernel/kgdb_64.c
@@ -406,14 +406,16 @@ static int kgdb_notify(struct notifier_b
 
        switch (cmd) {
        case DIE_NMI:
-               if (atomic_read(&debugger_active)) {
+               if (atomic_read(&debugger_active) &&
+                   !atomic_read(&debugger_exiting)) {
                        /* KGDB CPU roundup */
                        kgdb_nmihook(raw_smp_processor_id(), regs);
                        return NOTIFY_STOP;
                }
                return NOTIFY_DONE;
        case DIE_NMI_IPI:
-               if (atomic_read(&debugger_active)) {
+               if (atomic_read(&debugger_active) &&
+                   !atomic_read(&debugger_exiting)) {
                        /* KGDB CPU roundup */
                        if (kgdb_nmihook(raw_smp_processor_id(), regs))
                                return NOTIFY_DONE;
@@ -421,7 +423,8 @@ static int kgdb_notify(struct notifier_b
                }
                return NOTIFY_DONE;
        case DIE_NMIWATCHDOG:
-               if (atomic_read(&debugger_active)) {
+               if (atomic_read(&debugger_active) &&
+                   !atomic_read(&debugger_exiting)) {
                        /* KGDB CPU roundup */
                        kgdb_nmihook(raw_smp_processor_id(), regs);
                        return NOTIFY_STOP;
Index: ko_2_6_24_3_kgdb/include/linux/kgdb.h
===================================================================
--- ko_2_6_24_3_kgdb.orig/include/linux/kgdb.h
+++ ko_2_6_24_3_kgdb/include/linux/kgdb.h
@@ -281,6 +281,7 @@ extern int kgdb_handle_exception(int ex_
 extern int kgdb_nmihook(int cpu, void *regs);
 extern int debugger_step;
 extern atomic_t debugger_active;
+extern atomic_t debugger_exiting;
 #else
 /* Stubs for when KGDB is not set. */
 static const atomic_t debugger_active = ATOMIC_INIT(0);
Index: ko_2_6_24_3_kgdb/kernel/kgdb.c
===================================================================
--- ko_2_6_24_3_kgdb.orig/kernel/kgdb.c
+++ ko_2_6_24_3_kgdb/kernel/kgdb.c
@@ -117,6 +117,8 @@ int debugger_step;
 static atomic_t kgdb_sync = ATOMIC_INIT(-1);
 atomic_t debugger_active;
 EXPORT_SYMBOL(debugger_active);
+atomic_t debugger_exiting = ATOMIC_INIT(0);
+EXPORT_SYMBOL(debugger_exiting);
 
 /* Our I/O buffers. */
 static char remcom_in_buffer[BUFMAX];
@@ -1526,6 +1528,7 @@ default_handle:
        atomic_set(&procindebug[processor], 0);
 
        if (!debugger_step || !kgdb_contthread) {
+               atomic_set(&debugger_exiting, 1);
                for (i = 0; i < NR_CPUS; i++)
                        spin_unlock(&slavecpulocks[i]);
                /* Wait till all the processors have quit
@@ -1557,6 +1560,7 @@ default_handle:
  kgdb_restore:
        /* Free debugger_active */
        atomic_set(&debugger_active, 0);
+       atomic_set(&debugger_exiting, 0);
        atomic_set(&kgdb_sync, -1);
        clocksource_touch_watchdog();
        kgdb_softlock_skip[processor] = 1;

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Kgdb-bugreport mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kgdb-bugreport

Reply via email to