Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors

2017-09-07 Thread Mahesh Jagannath Salgaonkar
On 09/05/2017 09:45 AM, Balbir Singh wrote:
> Walk the page table for NIP and extract the instruction. Then
> use the instruction to find the effective address via analyse_instr().
> 
> We might have page table walking races, but we expect them to
> be rare, the physical address extraction is best effort. The idea
> is to then hook up this infrastructure to memory failure eventually.
> 
> Signed-off-by: Balbir Singh 
> ---
>  arch/powerpc/include/asm/mce.h  |  2 +-
>  arch/powerpc/kernel/mce.c   |  6 -
>  arch/powerpc/kernel/mce_power.c | 60 
> +
>  3 files changed, 61 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index 75292c7..3a1226e 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -204,7 +204,7 @@ struct mce_error_info {
> 
>  extern void save_mce_event(struct pt_regs *regs, long handled,
>  struct mce_error_info *mce_err, uint64_t nip,
> -uint64_t addr);
> +uint64_t addr, uint64_t phys_addr);
>  extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index e254399..f41a75d 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event 
> *mce,
>   */
>  void save_mce_event(struct pt_regs *regs, long handled,
>   struct mce_error_info *mce_err,
> - uint64_t nip, uint64_t addr)
> + uint64_t nip, uint64_t addr, uint64_t phys_addr)
>  {
>   int index = __this_cpu_inc_return(mce_nest_count) - 1;
>   struct machine_check_event *mce = this_cpu_ptr(_event[index]);
> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
>   mce->u.ue_error.effective_address_provided = true;
>   mce->u.ue_error.effective_address = addr;
> + if (phys_addr != ULONG_MAX) {
> + mce->u.ue_error.physical_address_provided = true;
> + mce->u.ue_error.physical_address = phys_addr;
> + }
>   }
>   return;
>  }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index b76ca19..b77a698 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -27,6 +27,25 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
> +
> +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr)
> +{
> + pte_t *ptep;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + if (mm == current->mm)
> + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
> + else
> + ptep = find_init_mm_pte(addr, NULL);
> + local_irq_restore(flags);
> + if (!ptep)
> + return ULONG_MAX;
> + return pte_pfn(*ptep);
> +}
> 
>  static void flush_tlb_206(unsigned int num_sets, unsigned int action)
>  {
> @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
> 
>  static int mce_handle_derror(struct pt_regs *regs,
>   const struct mce_derror_table table[],
> - struct mce_error_info *mce_err, uint64_t *addr)
> + struct mce_error_info *mce_err, uint64_t *addr,
> + uint64_t *phys_addr)
>  {
>   uint64_t dsisr = regs->dsisr;
>   int handled = 0;
> @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs,
>   mce_err->initiator = table[i].initiator;
>   if (table[i].dar_valid)
>   *addr = regs->dar;
> -
> + else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
> + table[i].error_type == MCE_ERROR_TYPE_UE) {
> + /*
> +  * Carefully look at the NIP to determine
> +  * the instruction to analyse. Reading the NIP
> +  * in real-mode is tricky and can lead to recursive
> +  * faults
> +  */
> + int instr;
> + struct mm_struct *mm;
> + unsigned long nip = regs->nip;
> + unsigned long pfn = 0, instr_addr;
> + struct instruction_op op;
> + struct pt_regs tmp = *regs;
> +
> + if (user_mode(regs))
> + mm = current->mm;
> + else
> + mm = _mm;
> +
> + pfn = addr_to_pfn(mm, nip);
> + if (pfn != ULONG_MAX) {
> + instr_addr = (pfn 

Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors

2017-09-06 Thread Balbir Singh
On Thu, Sep 7, 2017 at 8:56 AM, Benjamin Herrenschmidt  wrote:
> On Tue, 2017-09-05 at 14:15 +1000, Balbir Singh wrote:
>>  void save_mce_event(struct pt_regs *regs, long handled,
>> struct mce_error_info *mce_err,
>> -   uint64_t nip, uint64_t addr)
>> +   uint64_t nip, uint64_t addr, uint64_t phys_addr)
>>  {
>> int index = __this_cpu_inc_return(mce_nest_count) - 1;
>> struct machine_check_event *mce = this_cpu_ptr(_event[index]);
>> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
>> } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
>> mce->u.ue_error.effective_address_provided = true;
>> mce->u.ue_error.effective_address = addr;
>> +   if (phys_addr != ULONG_MAX) {
>> +   mce->u.ue_error.physical_address_provided = true;
>> +   mce->u.ue_error.physical_address = phys_addr;
>> +   }
>> }
>> return;
>
> Where is "addr" coming from ? Keep in mind that on P9 at least, a UE
> will *not* give you an EA in DAR in most cases.

The EA is derived in mce_handle_derror() via page table walk and
analyse_instr(), its the best
we can do FWIK

Balbir Singh.


Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors

2017-09-06 Thread Benjamin Herrenschmidt
On Tue, 2017-09-05 at 14:15 +1000, Balbir Singh wrote:
>  void save_mce_event(struct pt_regs *regs, long handled,
> struct mce_error_info *mce_err,
> -   uint64_t nip, uint64_t addr)
> +   uint64_t nip, uint64_t addr, uint64_t phys_addr)
>  {
> int index = __this_cpu_inc_return(mce_nest_count) - 1;
> struct machine_check_event *mce = this_cpu_ptr(_event[index]);
> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
> } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
> mce->u.ue_error.effective_address_provided = true;
> mce->u.ue_error.effective_address = addr;
> +   if (phys_addr != ULONG_MAX) {
> +   mce->u.ue_error.physical_address_provided = true;
> +   mce->u.ue_error.physical_address = phys_addr;
> +   }
> }
> return;

Where is "addr" coming from ? Keep in mind that on P9 at least, a UE
will *not* give you an EA in DAR in most cases.

Ben.



Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors

2017-09-05 Thread Balbir Singh
On Wed, Sep 6, 2017 at 10:36 AM, Nicholas Piggin  wrote:
> On Tue,  5 Sep 2017 14:15:54 +1000
> Balbir Singh  wrote:
>
>> Walk the page table for NIP and extract the instruction. Then
>> use the instruction to find the effective address via analyse_instr().
>>
>> We might have page table walking races, but we expect them to
>> be rare, the physical address extraction is best effort. The idea
>> is to then hook up this infrastructure to memory failure eventually.
>
> Cool. Too bad hardware doesn't give us the RA.
>
>>
>> Signed-off-by: Balbir Singh 
>> ---
>>  arch/powerpc/include/asm/mce.h  |  2 +-
>>  arch/powerpc/kernel/mce.c   |  6 -
>>  arch/powerpc/kernel/mce_power.c | 60 
>> +
>>  3 files changed, 61 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
>> index 75292c7..3a1226e 100644
>> --- a/arch/powerpc/include/asm/mce.h
>> +++ b/arch/powerpc/include/asm/mce.h
>> @@ -204,7 +204,7 @@ struct mce_error_info {
>>
>>  extern void save_mce_event(struct pt_regs *regs, long handled,
>>  struct mce_error_info *mce_err, uint64_t nip,
>> -uint64_t addr);
>> +uint64_t addr, uint64_t phys_addr);
>>  extern int get_mce_event(struct machine_check_event *mce, bool release);
>>  extern void release_mce_event(void);
>>  extern void machine_check_queue_event(void);
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index e254399..f41a75d 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event 
>> *mce,
>>   */
>>  void save_mce_event(struct pt_regs *regs, long handled,
>>   struct mce_error_info *mce_err,
>> - uint64_t nip, uint64_t addr)
>> + uint64_t nip, uint64_t addr, uint64_t phys_addr)
>>  {
>>   int index = __this_cpu_inc_return(mce_nest_count) - 1;
>>   struct machine_check_event *mce = this_cpu_ptr(_event[index]);
>> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
>>   } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
>>   mce->u.ue_error.effective_address_provided = true;
>>   mce->u.ue_error.effective_address = addr;
>> + if (phys_addr != ULONG_MAX) {
>> + mce->u.ue_error.physical_address_provided = true;
>> + mce->u.ue_error.physical_address = phys_addr;
>> + }
>>   }
>>   return;
>>  }
>> diff --git a/arch/powerpc/kernel/mce_power.c 
>> b/arch/powerpc/kernel/mce_power.c
>> index b76ca19..b77a698 100644
>> --- a/arch/powerpc/kernel/mce_power.c
>> +++ b/arch/powerpc/kernel/mce_power.c
>> @@ -27,6 +27,25 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr)
>> +{
>> + pte_t *ptep;
>> + unsigned long flags;
>> +
>> + local_irq_save(flags);
>> + if (mm == current->mm)
>> + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
>> + else
>> + ptep = find_init_mm_pte(addr, NULL);
>> + local_irq_restore(flags);
>> + if (!ptep)
>> + return ULONG_MAX;
>> + return pte_pfn(*ptep);
>
> I think you need to check that it's still cacheable memory here?
> !pte_speical && pfn <= highest_memmap_pfn?
>

find_*pte will return a NULL PTE, so we do have a check there. !pte_special is a
good check to have, I'll add it


>
>> +}
>>
>>  static void flush_tlb_206(unsigned int num_sets, unsigned int action)
>>  {
>> @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
>>
>>  static int mce_handle_derror(struct pt_regs *regs,
>>   const struct mce_derror_table table[],
>> - struct mce_error_info *mce_err, uint64_t *addr)
>> + struct mce_error_info *mce_err, uint64_t *addr,
>> + uint64_t *phys_addr)
>>  {
>>   uint64_t dsisr = regs->dsisr;
>>   int handled = 0;
>> @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs,
>>   mce_err->initiator = table[i].initiator;
>>   if (table[i].dar_valid)
>>   *addr = regs->dar;
>> -
>> + else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
>> + table[i].error_type == MCE_ERROR_TYPE_UE) {
>> + /*
>> +  * Carefully look at the NIP to determine
>> +  * the instruction to analyse. Reading the NIP
>> +  * in real-mode is tricky and can lead to recursive
>> +  * faults
>> +  */
>
> What recursive faults? If you ensure NIP is cacheable memory, I guess you
> can get a recursive machine 

Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors

2017-09-05 Thread Nicholas Piggin
On Tue,  5 Sep 2017 14:15:54 +1000
Balbir Singh  wrote:

> Walk the page table for NIP and extract the instruction. Then
> use the instruction to find the effective address via analyse_instr().
> 
> We might have page table walking races, but we expect them to
> be rare, the physical address extraction is best effort. The idea
> is to then hook up this infrastructure to memory failure eventually.

Cool. Too bad hardware doesn't give us the RA.

> 
> Signed-off-by: Balbir Singh 
> ---
>  arch/powerpc/include/asm/mce.h  |  2 +-
>  arch/powerpc/kernel/mce.c   |  6 -
>  arch/powerpc/kernel/mce_power.c | 60 
> +
>  3 files changed, 61 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index 75292c7..3a1226e 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -204,7 +204,7 @@ struct mce_error_info {
>  
>  extern void save_mce_event(struct pt_regs *regs, long handled,
>  struct mce_error_info *mce_err, uint64_t nip,
> -uint64_t addr);
> +uint64_t addr, uint64_t phys_addr);
>  extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index e254399..f41a75d 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event 
> *mce,
>   */
>  void save_mce_event(struct pt_regs *regs, long handled,
>   struct mce_error_info *mce_err,
> - uint64_t nip, uint64_t addr)
> + uint64_t nip, uint64_t addr, uint64_t phys_addr)
>  {
>   int index = __this_cpu_inc_return(mce_nest_count) - 1;
>   struct machine_check_event *mce = this_cpu_ptr(_event[index]);
> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
>   } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
>   mce->u.ue_error.effective_address_provided = true;
>   mce->u.ue_error.effective_address = addr;
> + if (phys_addr != ULONG_MAX) {
> + mce->u.ue_error.physical_address_provided = true;
> + mce->u.ue_error.physical_address = phys_addr;
> + }
>   }
>   return;
>  }
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index b76ca19..b77a698 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -27,6 +27,25 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
> +
> +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr)
> +{
> + pte_t *ptep;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + if (mm == current->mm)
> + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
> + else
> + ptep = find_init_mm_pte(addr, NULL);
> + local_irq_restore(flags);
> + if (!ptep)
> + return ULONG_MAX;
> + return pte_pfn(*ptep);

I think you need to check that it's still cacheable memory here?
!pte_speical && pfn <= highest_memmap_pfn?


> +}
>  
>  static void flush_tlb_206(unsigned int num_sets, unsigned int action)
>  {
> @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
>  
>  static int mce_handle_derror(struct pt_regs *regs,
>   const struct mce_derror_table table[],
> - struct mce_error_info *mce_err, uint64_t *addr)
> + struct mce_error_info *mce_err, uint64_t *addr,
> + uint64_t *phys_addr)
>  {
>   uint64_t dsisr = regs->dsisr;
>   int handled = 0;
> @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs,
>   mce_err->initiator = table[i].initiator;
>   if (table[i].dar_valid)
>   *addr = regs->dar;
> -
> + else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
> + table[i].error_type == MCE_ERROR_TYPE_UE) {
> + /*
> +  * Carefully look at the NIP to determine
> +  * the instruction to analyse. Reading the NIP
> +  * in real-mode is tricky and can lead to recursive
> +  * faults
> +  */

What recursive faults? If you ensure NIP is cacheable memory, I guess you
can get a recursive machine check from reading it, but that's probably
tolerable.

> + int instr;
> + struct mm_struct *mm;
> + unsigned long nip = regs->nip;
> + unsigned long pfn = 0, instr_addr;
> + struct instruction_op op;
> +

[rfc 2/3] powerpc/mce: Extract physical_address for UE errors

2017-09-04 Thread Balbir Singh
Walk the page table for NIP and extract the instruction. Then
use the instruction to find the effective address via analyse_instr().

We might have page table walking races, but we expect them to
be rare, the physical address extraction is best effort. The idea
is to then hook up this infrastructure to memory failure eventually.

Signed-off-by: Balbir Singh 
---
 arch/powerpc/include/asm/mce.h  |  2 +-
 arch/powerpc/kernel/mce.c   |  6 -
 arch/powerpc/kernel/mce_power.c | 60 +
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 75292c7..3a1226e 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -204,7 +204,7 @@ struct mce_error_info {
 
 extern void save_mce_event(struct pt_regs *regs, long handled,
   struct mce_error_info *mce_err, uint64_t nip,
-  uint64_t addr);
+  uint64_t addr, uint64_t phys_addr);
 extern int get_mce_event(struct machine_check_event *mce, bool release);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index e254399..f41a75d 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event 
*mce,
  */
 void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
-   uint64_t nip, uint64_t addr)
+   uint64_t nip, uint64_t addr, uint64_t phys_addr)
 {
int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(_event[index]);
@@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
+   if (phys_addr != ULONG_MAX) {
+   mce->u.ue_error.physical_address_provided = true;
+   mce->u.ue_error.physical_address = phys_addr;
+   }
}
return;
 }
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index b76ca19..b77a698 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -27,6 +27,25 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+
+static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr)
+{
+   pte_t *ptep;
+   unsigned long flags;
+
+   local_irq_save(flags);
+   if (mm == current->mm)
+   ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+   else
+   ptep = find_init_mm_pte(addr, NULL);
+   local_irq_restore(flags);
+   if (!ptep)
+   return ULONG_MAX;
+   return pte_pfn(*ptep);
+}
 
 static void flush_tlb_206(unsigned int num_sets, unsigned int action)
 {
@@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
 
 static int mce_handle_derror(struct pt_regs *regs,
const struct mce_derror_table table[],
-   struct mce_error_info *mce_err, uint64_t *addr)
+   struct mce_error_info *mce_err, uint64_t *addr,
+   uint64_t *phys_addr)
 {
uint64_t dsisr = regs->dsisr;
int handled = 0;
@@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
-
+   else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+   table[i].error_type == MCE_ERROR_TYPE_UE) {
+   /*
+* Carefully look at the NIP to determine
+* the instruction to analyse. Reading the NIP
+* in real-mode is tricky and can lead to recursive
+* faults
+*/
+   int instr;
+   struct mm_struct *mm;
+   unsigned long nip = regs->nip;
+   unsigned long pfn = 0, instr_addr;
+   struct instruction_op op;
+   struct pt_regs tmp = *regs;
+
+   if (user_mode(regs))
+   mm = current->mm;
+   else
+   mm = _mm;
+
+   pfn = addr_to_pfn(mm, nip);
+   if (pfn != ULONG_MAX) {
+   instr_addr = (pfn << PAGE_SHIFT) + (nip & 
~PAGE_MASK);
+   instr = *(unsigned int *)(instr_addr);
+   if (!analyse_instr(, ,