Re: [PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting

2020-05-11 Thread Michael Ellerman
Nicholas Piggin  writes:
> Excerpts from kbuild test robot's message of May 9, 2020 1:13 pm:
>> Hi Nicholas,
>> 
>> I love your patch! Yet something to improve:
>
> ...
>
>>   1419   #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>>   1420   pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, 
>> regs->dsisr);
>>   1421   #else
>>   1422   pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, 
>> regs->dsisr);
>>   1423   #endif
>>   1424   #ifdef CONFIG_PPC64
>>> 1425pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", 
>>> regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
>
> Oh I meant to get rid of that hunk, it crept back in :(
>
> mpe if you could please take it out if you're merging this.

Yep. I just came here to tell you I'd dropped that hunk :)

> It was quite useful for debugging this stuff, I might do a proper patch 
> for this, but for now not necessary (it doesn't matter for "normal" 
> crashes only crash crashes).

Yeah would be good to print more of those flags.

cheers


Re: [PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting

2020-05-09 Thread Nicholas Piggin
Excerpts from kbuild test robot's message of May 9, 2020 1:13 pm:
> Hi Nicholas,
> 
> I love your patch! Yet something to improve:

...

>   1419#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>   1420pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, 
> regs->dsisr);
>   1421#else
>   1422pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, 
> regs->dsisr);
>   1423#endif
>   1424#ifdef CONFIG_PPC64
>> 1425 pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
>> (int)get_paca()->in_nmi, (int)get_paca()->in_mce);

Oh I meant to get rid of that hunk, it crept back in :(

mpe if you could please take it out if you're merging this.

It was quite useful for debugging this stuff, I might do a proper patch 
for this, but for now not necessary (it doesn't matter for "normal" 
crashes only crash crashes).

Thanks,
Nick


Re: [PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting

2020-05-08 Thread kbuild test robot
Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on tip/perf/core v5.7-rc4 next-20200508]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-machine-check-and-system-reset-fixes/20200509-030554
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r002-20200509 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day GCC_VERSION=9.3.0 make.cross 
ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15,
from include/asm-generic/bug.h:19,
from arch/powerpc/include/asm/bug.h:109,
from include/linux/bug.h:5,
from arch/powerpc/include/asm/mmu.h:130,
from arch/powerpc/include/asm/paca.h:18,
from arch/powerpc/include/asm/current.h:13,
from include/linux/sched.h:12,
from arch/powerpc/kernel/process.c:14:
   arch/powerpc/kernel/process.c: In function 'show_regs':
>> arch/powerpc/kernel/process.c:1425:74: error: 'struct paca_struct' has no 
>> member named 'in_nmi'
1425 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 |  
^~
   include/linux/printk.h:312:26: note: in definition of macro 'pr_cont'
 312 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
 |  ^~~
>> arch/powerpc/kernel/process.c:1425:99: error: 'struct paca_struct' has no 
>> member named 'in_mce'
1425 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 |  
 ^~
   include/linux/printk.h:312:26: note: in definition of macro 'pr_cont'
 312 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
 |  ^~~

vim +1425 arch/powerpc/kernel/process.c

  1401  
  1402  void show_regs(struct pt_regs * regs)
  1403  {
  1404  int i, trap;
  1405  
  1406  show_regs_print_info(KERN_DEFAULT);
  1407  
  1408  printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
  1409 regs->nip, regs->link, regs->ctr);
  1410  printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
  1411 regs, regs->trap, print_tainted(), 
init_utsname()->release);
  1412  printk("MSR:  "REG" ", regs->msr);
  1413  print_msr_bits(regs->msr);
  1414  pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
  1415  trap = TRAP(regs);
  1416  if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
  1417  pr_cont("CFAR: "REG" ", regs->orig_gpr3);
  1418  if (trap == 0x200 || trap == 0x300 || trap == 0x600)
  1419  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
  1420  pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, 
regs->dsisr);
  1421  #else
  1422  pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, 
regs->dsisr);
  1423  #endif
  1424  #ifdef CONFIG_PPC64
> 1425  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
> (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
  1426  #endif
  1427  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  1428  if (MSR_TM_ACTIVE(regs->msr))
  1429  pr_cont("\nPACATMSCRATCH: %016llx ", 
get_paca()->tm_scratch);
  1430  #endif
  1431  
  1432  for (i = 0;  i < 32;  i++) {
  1433  if ((i % REGS_PER_LINE) == 0)
  1434  pr_cont("\nGPR%02d: ", i);
  1435  pr_cont(REG " ", regs->gpr[i]);
  1436  if (i == LAST_VOLATILE && !FULL_REGS(regs))
  1437  break;
  1438  }
  1439  pr_cont("\n");
  1440  #ifdef CONFIG_KALLSYMS
  1441  /*
  1442   * Lookup NIP late so we have the best change of getting the
  1443   * above info out without failing
  1444   */
  1445  printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
  1446  printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
  1447  #endif
  1448  show_stack(current, (unsigned 

[PATCH v4 11/16] powerpc/64s: machine check interrupt update NMI accounting

2020-05-07 Thread Nicholas Piggin
machine_check_early is taken as an NMI, so nmi_enter is used there.
machine_check_exception is no longer taken as an NMI (it's invoked
via irq_work in the case a machine check hits in kernel mode), so
remove the nmi_enter from that case.

In NMI context, hash faults don't try to refill the hash table, which
can lead to crashes accessing non-pinned kernel pages. System reset
still has this potential problem.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/mce.c |  7 +++
 arch/powerpc/kernel/process.c |  2 +-
 arch/powerpc/kernel/traps.c   | 14 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 8077b5fb18a7..be7e3f92a7b5 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 long machine_check_early(struct pt_regs *regs)
 {
long handled = 0;
+   bool nested = in_nmi();
+   if (!nested)
+   nmi_enter();
 
hv_nmi_check_nonrecoverable(regs);
 
@@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs)
 */
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
+
+   if (!nested)
+   nmi_exit();
+
return handled;
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9c21288f8645..44410dd3029f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs)
pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 #endif
 #ifdef CONFIG_PPC64
-   pr_cont("IRQMASK: %lx ", regs->softe);
+   pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3fca22276bb1..9f6852322e59 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -823,7 +823,19 @@ int machine_check_generic(struct pt_regs *regs)
 void machine_check_exception(struct pt_regs *regs)
 {
int recover = 0;
-   bool nested = in_nmi();
+   bool nested;
+
+   /*
+* BOOK3S_64 does not call this handler as a non-maskable interrupt
+* (it uses its own early real-mode handler to handle the MCE proper
+* and then raises irq_work to call this handler when interrupts are
+* enabled). Set nested = true for this case, which just makes it avoid
+* the nmi_enter/exit.
+*/
+   if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) || in_nmi())
+   nested = true;
+   else
+   nested = false;
if (!nested)
nmi_enter();
 
-- 
2.23.0