On 2012年05月08日 11:46, Benjamin Herrenschmidt wrote: > Hi Wang ! > > Does this patch fixes it for you ? >
Sorry, this patch doesn't work. And my system crashed again with the patch. ====================================================== # kernel BUG at /usr/src/kernels/linux/arch/powerpc/kernel/irq.c:188! cpu 0x0: Vector: 700 (Program Check) at [c00000026ffebbb0] pc: c00000000000ea9c: .__check_irq_replay+0x7c/0x90 lr: c000000000010578: .arch_local_irq_restore+0x38/0x90 sp: c00000026ffebe30 msr: 8000000000029032 current = 0xc000000000e27be0 paca = 0xc000000003580000 softe: 0 irq_happened: 0x01 pid = 0, comm = swapper/0 kernel BUG at /usr/src/kernels/linux/arch/powerpc/kernel/irq.c:188! enter ? for help [link register ] c000000000010578 .arch_local_irq_restore+0x38/0x90 [c00000026ffebe30] c000000000f42100 softirq_vec+0x0/0x80 (unreliable) [c00000026ffebea0] c000000000085854 .__do_softirq+0xa4/0x2a0 [c00000026ffebf90] c0000000000229b8 .call_do_softirq+0x14/0x24 [c000000000edf870] c0000000000106c8 .do_softirq+0xf8/0x130 [c000000000edf910] c000000000085544 .irq_exit+0xc4/0xf0 [c000000000edf990] c0000000000100a4 .do_IRQ+0xe4/0x310 [c000000000edfa50] c0000000000038c0 hardware_interrupt_common+0x140/0x180 --- Exception: 501 (Hardware Interrupt) at c0000000000105b4 .arch_local_irq_restore+0x74/0x90 [c000000000edfd40] c000000000058480 .pSeries_idle+0x10/0x40 (unreliable) [c000000000edfdb0] c000000000017d70 .cpu_idle+0x190/0x290 [c000000000edfe70] c00000000000b308 .rest_init+0x88/0xa0 [c000000000edfef0] c0000000008c0d1c .start_kernel+0x554/0x574 [c000000000edff90] c000000000009658 .start_here_common+0x20/0x48 0:mon> e cpu 0x0: Vector: 700 (Program Check) at [c00000026ffebbb0] pc: c00000000000ea9c: .__check_irq_replay+0x7c/0x90 lr: c000000000010578: .arch_local_irq_restore+0x38/0x90 sp: c00000026ffebe30 msr: 8000000000029032 current = 0xc000000000e27be0 paca = 0xc000000003580000 softe: 0 irq_happened: 0x01 pid = 0, comm = swapper/0 kernel BUG at /usr/src/kernels/linux/arch/powerpc/kernel/irq.c:188! 0:mon> r R00 = 0000000000000001 R16 = 0000000003680000 R01 = c00000026ffebe30 R17 = 000000000021ed0f R02 = c000000000edd228 R18 = 000000000021efbb R03 = 0000000000000500 R19 = 000000000021ee84 R04 = 0000000000000000 R20 = c000000000f42100 R05 = 00000000000007ea R21 = 0000000000000000 R06 = 00000000273f6d30 R22 = c000000000955b80 R07 = 00363d0e68097e11 R23 = c000000000955b80 R08 = 00000000008c0000 R24 = 000000000000000a R09 = c000000003580000 R25 = 0000000000000000 R10 = 0000000000000001 R26 = c000000000edc100 R11 = 0000000000000000 R27 = c00000026ffe8000 R12 = 0000000000000002 R28 = 0000000000000000 R13 = c000000003580000 R29 = c000000000f42100 R14 = 0000000002e1fa78 R30 = c000000000e60890 R15 = 0000000001173000 R31 = 0000000000000040 pc = c00000000000ea9c .__check_irq_replay+0x7c/0x90 cfar= c00000000000ea3c .__check_irq_replay+0x1c/0x90 lr = c000000000010578 .arch_local_irq_restore+0x38/0x90 msr = 8000000000029032 cr = 28000048 ctr = c000000000063f70 xer = 0000000000000001 trap = 700 0:mon> t [link register ] c000000000010578 .arch_local_irq_restore+0x38/0x90 [c00000026ffebe30] c000000000f42100 softirq_vec+0x0/0x80 (unreliable) [c00000026ffebea0] c000000000085854 .__do_softirq+0xa4/0x2a0 [c00000026ffebf90] c0000000000229b8 .call_do_softirq+0x14/0x24 [c000000000edf870] c0000000000106c8 .do_softirq+0xf8/0x130 [c000000000edf910] c000000000085544 .irq_exit+0xc4/0xf0 [c000000000edf990] c0000000000100a4 .do_IRQ+0xe4/0x310 [c000000000edfa50] c0000000000038c0 hardware_interrupt_common+0x140/0x180 --- Exception: 501 (Hardware Interrupt) at c0000000000105b4 .arch_local_irq_restore+0x74/0x90 [c000000000edfd40] c000000000058480 .pSeries_idle+0x10/0x40 (unreliable) [c000000000edfdb0] c000000000017d70 .cpu_idle+0x190/0x290 [c000000000edfe70] c00000000000b308 .rest_init+0x88/0xa0 [c000000000edfef0] c0000000008c0d1c .start_kernel+0x554/0x574 [c000000000edff90] c000000000009658 .start_here_common+0x20/0x48 0:mon> di > From 249f8649bf95a4c3e6637284754a165c1d83c394 Mon Sep 17 00:00:00 2001 > From: Benjamin Herrenschmidt <b...@kernel.crashing.org> > Date: Tue, 8 May 2012 13:31:59 +1000 > Subject: [PATCH 2/3] powerpc/irq: Fix bug with new lazy IRQ handling code > > We had a case where we could turn on hard interrupts while > leaving the PACA_IRQ_HARD_DIS bit set in the PACA. This can > in turn cause a BUG_ON() to hit in __check_irq_replay() due > to interrupt state getting out of sync. > > The assembly code was also way too convoluted. Instead, we > now leave it to the C code to do the right thing which ends > up being smaller and more readable. > > Signed-off-by: Benjamin Herrenschmidt <b...@kernel.crashing.org> > --- > arch/powerpc/kernel/entry_64.S | 18 ------------------ > arch/powerpc/kernel/irq.c | 8 +++++++- > 2 files changed, 7 insertions(+), 19 deletions(-) > > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index fd46046..29f1357 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -763,16 +763,6 @@ do_work: > SOFT_DISABLE_INTS(r3,r4) > 1: bl .preempt_schedule_irq > > - /* Hard-disable interrupts again (and update PACA) */ > -#ifdef CONFIG_PPC_BOOK3E > - wrteei 0 > -#else > - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ > - mtmsrd r10,1 > -#endif /* CONFIG_PPC_BOOK3E */ > - li r0,PACA_IRQ_HARD_DIS > - stb r0,PACAIRQHAPPENED(r13) > - > /* Re-test flags and eventually loop */ > clrrdi r9,r1,THREAD_SHIFT > ld r4,TI_FLAGS(r9) > @@ -783,14 +773,6 @@ do_work: > user_work: > #endif /* CONFIG_PREEMPT */ > > - /* Enable interrupts */ > -#ifdef CONFIG_PPC_BOOK3E > - wrteei 1 > -#else > - ori r10,r10,MSR_EE > - mtmsrd r10,1 > -#endif /* CONFIG_PPC_BOOK3E */ > - > andi. r0,r4,_TIF_NEED_RESCHED > beq 1f > bl .restore_interrupts > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c > index 5ec1b23..3717fb5 100644 > --- a/arch/powerpc/kernel/irq.c > +++ b/arch/powerpc/kernel/irq.c > @@ -260,11 +260,17 @@ EXPORT_SYMBOL(arch_local_irq_restore); > * if they are currently disabled. This is typically called before > * schedule() or do_signal() when returning to userspace. We do it > * in C to avoid the burden of dealing with lockdep etc... > + * > + * NOTE: This is called with interrupts hard disabled but not marked > + * as such in paca->irq_happened, so we need to resync this. > */ > void restore_interrupts(void) > { > - if (irqs_disabled()) > + if (irqs_disabled()) { > + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; > local_irq_enable(); > + } else > + __hard_irq_enable(); > } > > #endif /* CONFIG_PPC64 */ _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev