Following was observed by David Wilder while testig kexec on panic on
a system with Pentium 4 with HT. Looks like the problem might have
occured due to  CPU0 holding the lock and then it might have
received the NMI and it was stopped with runq lock held. CPU1, 
enabled the interrupts after stopping other CPU0. Somehow it received 
the local apic timer interrupt and started spinning on runq lock.

Appended is the possible fix for this race condition, which avoids
re-enabling irqs on panic shutdown

Please review.

Thanks
Maneesh


-- BUG: spinlock lockup on CPU#1, bash/4396, c52781a0
[<c01c1870>] _raw_spin_lock+0xb7/0xd2
[<c029e148>] _spin_lock+0x6/0x8
[<c011b33f>] scheduler_tick+0xe7/0x328
[<c0128a7c>] update_process_times+0x51/0x5d
[<c0114592>] smp_apic_timer_interrupt+0x4f/0x58
[<c01141ff>] lapic_shutdown+0x76/0x7e
[<c0104d7c>] apic_timer_interrupt+0x1c/0x30
[<c01141ff>] lapic_shutdown+0x76/0x7e
[<c0116659>] machine_crash_shutdown+0x83/0xaa
[<c013cc36>] crash_kexec+0xc1/0xe3
[<c029e148>] _spin_lock+0x6/0x8
[<c013cc22>] crash_kexec+0xad/0xe3
[<c0215280>] __handle_sysrq+0x84/0xfd
[<c018d937>] write_sysrq_trigger+0x2c/0x35
[<c015e47b>] vfs_write+0xa2/0x13b
[<c015ea73>] sys_write+0x3b/0x64
[<c0103c69>] syscall_call+0x7/0xb



o lapic_shutdown() re-enables irqs, which is un-desirable for panic case,
  so call disable_local_APIC() directly from machine_crash_shutdown() 
  and close a possible race window.

Signed-off-by: Maneesh Soni <[EMAIL PROTECTED]>
---

 linux-2.6.16-rc5-git10-maneesh/arch/i386/kernel/apic.c  |    2 +-
 linux-2.6.16-rc5-git10-maneesh/arch/i386/kernel/crash.c |    8 +++++++-
 linux-2.6.16-rc5-git10-maneesh/include/asm-i386/apic.h  |    2 ++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff -puN arch/i386/kernel/crash.c~kdump-shutdown-hang-fix 
arch/i386/kernel/crash.c
--- linux-2.6.16-rc5-git10/arch/i386/kernel/crash.c~kdump-shutdown-hang-fix     
2006-03-08 10:04:59.541357032 +0530
+++ linux-2.6.16-rc5-git10-maneesh/arch/i386/kernel/crash.c     2006-03-08 
10:04:59.585350344 +0530
@@ -175,7 +175,13 @@ void machine_crash_shutdown(struct pt_re
        /* Make a note of crashing cpu. Will be used in NMI callback.*/
        crashing_cpu = smp_processor_id();
        nmi_shootdown_cpus();
-       lapic_shutdown();
+
+       if (cpu_has_apic) {
+               clear_local_APIC();
+               if (enabled_via_apicbase)
+                       disable_local_APIC();
+       }
+
 #if defined(CONFIG_X86_IO_APIC)
        disable_IO_APIC();
 #endif
diff -puN arch/i386/kernel/apic.c~kdump-shutdown-hang-fix 
arch/i386/kernel/apic.c
--- linux-2.6.16-rc5-git10/arch/i386/kernel/apic.c~kdump-shutdown-hang-fix      
2006-03-08 10:04:59.574352016 +0530
+++ linux-2.6.16-rc5-git10-maneesh/arch/i386/kernel/apic.c      2006-03-08 
10:04:59.589349736 +0530
@@ -102,7 +102,7 @@ void __init apic_intr_init(void)
 /* Using APIC to generate smp_local_timer_interrupt? */
 int using_apic_timer = 0;
 
-static int enabled_via_apicbase;
+int enabled_via_apicbase;
 
 void enable_NMI_through_LVT0 (void * dummy)
 {
diff -puN include/asm-i386/apic.h~kdump-shutdown-hang-fix 
include/asm-i386/apic.h
--- linux-2.6.16-rc5-git10/include/asm-i386/apic.h~kdump-shutdown-hang-fix      
2006-03-08 10:04:59.578351408 +0530
+++ linux-2.6.16-rc5-git10-maneesh/include/asm-i386/apic.h      2006-03-08 
10:04:59.589349736 +0530
@@ -137,6 +137,8 @@ void switch_APIC_timer_to_ipi(void *cpum
 void switch_ipi_to_APIC_timer(void *cpumask);
 #define ARCH_APICTIMER_STOPS_ON_C3     1
 
+extern int enabled_via_apicbase;
+
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 
_
-- 
Maneesh Soni
Linux Technology Center, 
IBM India Software Labs,
Bangalore, India
email: [EMAIL PROTECTED]
Phone: 91-80-51776416
_______________________________________________
fastboot mailing list
[email protected]
https://lists.osdl.org/mailman/listinfo/fastboot

Reply via email to