Re: [PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting
Nicholas Piggin writes: > Excerpts from kbuild test robot's message of May 9, 2020 1:13 pm: >> Hi Nicholas, >> >> I love your patch! Yet something to improve: > > ... > >> 1419 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) >> 1420 pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, >> regs->dsisr); >> 1421 #else >> 1422 pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, >> regs->dsisr); >> 1423 #endif >> 1424 #ifdef CONFIG_PPC64 >>> 1425pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", >>> regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); > > Oh I meant to get rid of that hunk, it crept back in :( > > mpe if you could please take it out if you're merging this. Yep. I just came here to tell you I'd dropped that hunk :) > It was quite useful for debugging this stuff, I might do a proper patch > for this, but for now not necessary (it doesn't matter for "normal" > crashes only crash crashes). Yeah would be good to print more of those flags. cheers
Re: [PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting
Excerpts from kbuild test robot's message of May 9, 2020 1:13 pm: > Hi Nicholas, > > I love your patch! Yet something to improve: ... > 1419#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) > 1420pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, > regs->dsisr); > 1421#else > 1422pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, > regs->dsisr); > 1423#endif > 1424#ifdef CONFIG_PPC64 >> 1425 pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, >> (int)get_paca()->in_nmi, (int)get_paca()->in_mce); Oh I meant to get rid of that hunk, it crept back in :( mpe if you could please take it out if you're merging this. It was quite useful for debugging this stuff, I might do a proper patch for this, but for now not necessary (it doesn't matter for "normal" crashes only crash crashes). Thanks, Nick
Re: [PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting
Hi Nicholas, I love your patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on tip/perf/core v5.7-rc4 next-20200508] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-machine-check-and-system-reset-fixes/20200509-030554 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-randconfig-r002-20200509 (attached as .config) compiler: powerpc64-linux-gcc (GCC) 9.3.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day GCC_VERSION=9.3.0 make.cross ARCH=powerpc If you fix the issue, kindly add following tag as appropriate Reported-by: kbuild test robot All errors (new ones prefixed by >>): In file included from include/linux/kernel.h:15, from include/asm-generic/bug.h:19, from arch/powerpc/include/asm/bug.h:109, from include/linux/bug.h:5, from arch/powerpc/include/asm/mmu.h:130, from arch/powerpc/include/asm/paca.h:18, from arch/powerpc/include/asm/current.h:13, from include/linux/sched.h:12, from arch/powerpc/kernel/process.c:14: arch/powerpc/kernel/process.c: In function 'show_regs': >> arch/powerpc/kernel/process.c:1425:74: error: 'struct paca_struct' has no >> member named 'in_nmi' 1425 | pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); | ^~ include/linux/printk.h:312:26: note: in definition of macro 'pr_cont' 312 | printk(KERN_CONT fmt, ##__VA_ARGS__) | ^~~ >> arch/powerpc/kernel/process.c:1425:99: error: 'struct paca_struct' has no >> member named 'in_mce' 1425 | pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); | ^~ include/linux/printk.h:312:26: note: in definition of macro 'pr_cont' 312 | printk(KERN_CONT fmt, ##__VA_ARGS__) | ^~~ vim +1425 arch/powerpc/kernel/process.c 1401 1402 void show_regs(struct pt_regs * regs) 1403 { 1404 int i, trap; 1405 1406 show_regs_print_info(KERN_DEFAULT); 1407 1408 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 1409 regs->nip, regs->link, regs->ctr); 1410 printk("REGS: %px TRAP: %04lx %s (%s)\n", 1411 regs, regs->trap, print_tainted(), init_utsname()->release); 1412 printk("MSR: "REG" ", regs->msr); 1413 print_msr_bits(regs->msr); 1414 pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); 1415 trap = TRAP(regs); 1416 if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) 1417 pr_cont("CFAR: "REG" ", regs->orig_gpr3); 1418 if (trap == 0x200 || trap == 0x300 || trap == 0x600) 1419 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) 1420 pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); 1421 #else 1422 pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); 1423 #endif 1424 #ifdef CONFIG_PPC64 > 1425 pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, > (int)get_paca()->in_nmi, (int)get_paca()->in_mce); 1426 #endif 1427 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1428 if (MSR_TM_ACTIVE(regs->msr)) 1429 pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); 1430 #endif 1431 1432 for (i = 0; i < 32; i++) { 1433 if ((i % REGS_PER_LINE) == 0) 1434 pr_cont("\nGPR%02d: ", i); 1435 pr_cont(REG " ", regs->gpr[i]); 1436 if (i == LAST_VOLATILE && !FULL_REGS(regs)) 1437 break; 1438 } 1439 pr_cont("\n"); 1440 #ifdef CONFIG_KALLSYMS 1441 /* 1442 * Lookup NIP late so we have the best change of getting the 1443 * above info out without failing 1444 */ 1445 printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); 1446 printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); 1447 #endif 1448 show_stack(current, (unsigned
[PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting
machine_check_early is taken as an NMI, so nmi_enter is used there. machine_check_exception is no longer taken as an NMI (it's invoked via irq_work in the case a machine check hits in kernel mode), so remove the nmi_enter from that case. In NMI context, hash faults don't try to refill the hash table, which can lead to crashes accessing non-pinned kernel pages. System reset still has this potential problem. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/mce.c | 7 +++ arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/traps.c | 14 +- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8077b5fb18a7..be7e3f92a7b5 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); long machine_check_early(struct pt_regs *regs) { long handled = 0; + bool nested = in_nmi(); + if (!nested) + nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs) */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); + + if (!nested) + nmi_exit(); + return handled; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9c21288f8645..44410dd3029f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs) pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - pr_cont("IRQMASK: %lx ", regs->softe); + pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3fca22276bb1..9f6852322e59 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -823,7 +823,19 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { int recover = 0; - bool nested = in_nmi(); + bool nested; + + /* +* BOOK3S_64 does not call this handler as a non-maskable interrupt +* (it uses its own early real-mode handler to handle the MCE proper +* and then raises irq_work to call this handler when interrupts are +* enabled). Set nested = true for this case, which just makes it avoid +* the nmi_enter/exit. +*/ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) || in_nmi()) + nested = true; + else + nested = false; if (!nested) nmi_enter(); -- 2.23.0