Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors
On 09/05/2017 09:45 AM, Balbir Singh wrote: > Walk the page table for NIP and extract the instruction. Then > use the instruction to find the effective address via analyse_instr(). > > We might have page table walking races, but we expect them to > be rare, the physical address extraction is best effort. The idea > is to then hook up this infrastructure to memory failure eventually. > > Signed-off-by: Balbir Singh> --- > arch/powerpc/include/asm/mce.h | 2 +- > arch/powerpc/kernel/mce.c | 6 - > arch/powerpc/kernel/mce_power.c | 60 > + > 3 files changed, 61 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h > index 75292c7..3a1226e 100644 > --- a/arch/powerpc/include/asm/mce.h > +++ b/arch/powerpc/include/asm/mce.h > @@ -204,7 +204,7 @@ struct mce_error_info { > > extern void save_mce_event(struct pt_regs *regs, long handled, > struct mce_error_info *mce_err, uint64_t nip, > -uint64_t addr); > +uint64_t addr, uint64_t phys_addr); > extern int get_mce_event(struct machine_check_event *mce, bool release); > extern void release_mce_event(void); > extern void machine_check_queue_event(void); > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index e254399..f41a75d 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event > *mce, > */ > void save_mce_event(struct pt_regs *regs, long handled, > struct mce_error_info *mce_err, > - uint64_t nip, uint64_t addr) > + uint64_t nip, uint64_t addr, uint64_t phys_addr) > { > int index = __this_cpu_inc_return(mce_nest_count) - 1; > struct machine_check_event *mce = this_cpu_ptr(_event[index]); > @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled, > } else if (mce->error_type == MCE_ERROR_TYPE_UE) { > mce->u.ue_error.effective_address_provided = true; > mce->u.ue_error.effective_address = addr; > + if (phys_addr != ULONG_MAX) { > + mce->u.ue_error.physical_address_provided = true; > + mce->u.ue_error.physical_address = phys_addr; > + } > } > return; > } > diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c > index b76ca19..b77a698 100644 > --- a/arch/powerpc/kernel/mce_power.c > +++ b/arch/powerpc/kernel/mce_power.c > @@ -27,6 +27,25 @@ > #include > #include > #include > +#include > +#include > +#include > + > +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr) > +{ > + pte_t *ptep; > + unsigned long flags; > + > + local_irq_save(flags); > + if (mm == current->mm) > + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); > + else > + ptep = find_init_mm_pte(addr, NULL); > + local_irq_restore(flags); > + if (!ptep) > + return ULONG_MAX; > + return pte_pfn(*ptep); > +} > > static void flush_tlb_206(unsigned int num_sets, unsigned int action) > { > @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs, > > static int mce_handle_derror(struct pt_regs *regs, > const struct mce_derror_table table[], > - struct mce_error_info *mce_err, uint64_t *addr) > + struct mce_error_info *mce_err, uint64_t *addr, > + uint64_t *phys_addr) > { > uint64_t dsisr = regs->dsisr; > int handled = 0; > @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs, > mce_err->initiator = table[i].initiator; > if (table[i].dar_valid) > *addr = regs->dar; > - > + else if (mce_err->severity == MCE_SEV_ERROR_SYNC && > + table[i].error_type == MCE_ERROR_TYPE_UE) { > + /* > + * Carefully look at the NIP to determine > + * the instruction to analyse. Reading the NIP > + * in real-mode is tricky and can lead to recursive > + * faults > + */ > + int instr; > + struct mm_struct *mm; > + unsigned long nip = regs->nip; > + unsigned long pfn = 0, instr_addr; > + struct instruction_op op; > + struct pt_regs tmp = *regs; > + > + if (user_mode(regs)) > + mm = current->mm; > + else > + mm = _mm; > + > + pfn = addr_to_pfn(mm, nip); > + if (pfn != ULONG_MAX) { > + instr_addr = (pfn
Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors
On Thu, Sep 7, 2017 at 8:56 AM, Benjamin Herrenschmidtwrote: > On Tue, 2017-09-05 at 14:15 +1000, Balbir Singh wrote: >> void save_mce_event(struct pt_regs *regs, long handled, >> struct mce_error_info *mce_err, >> - uint64_t nip, uint64_t addr) >> + uint64_t nip, uint64_t addr, uint64_t phys_addr) >> { >> int index = __this_cpu_inc_return(mce_nest_count) - 1; >> struct machine_check_event *mce = this_cpu_ptr(_event[index]); >> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled, >> } else if (mce->error_type == MCE_ERROR_TYPE_UE) { >> mce->u.ue_error.effective_address_provided = true; >> mce->u.ue_error.effective_address = addr; >> + if (phys_addr != ULONG_MAX) { >> + mce->u.ue_error.physical_address_provided = true; >> + mce->u.ue_error.physical_address = phys_addr; >> + } >> } >> return; > > Where is "addr" coming from ? Keep in mind that on P9 at least, a UE > will *not* give you an EA in DAR in most cases. The EA is derived in mce_handle_derror() via page table walk and analyse_instr(), its the best we can do FWIK Balbir Singh.
Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors
On Tue, 2017-09-05 at 14:15 +1000, Balbir Singh wrote: > void save_mce_event(struct pt_regs *regs, long handled, > struct mce_error_info *mce_err, > - uint64_t nip, uint64_t addr) > + uint64_t nip, uint64_t addr, uint64_t phys_addr) > { > int index = __this_cpu_inc_return(mce_nest_count) - 1; > struct machine_check_event *mce = this_cpu_ptr(_event[index]); > @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled, > } else if (mce->error_type == MCE_ERROR_TYPE_UE) { > mce->u.ue_error.effective_address_provided = true; > mce->u.ue_error.effective_address = addr; > + if (phys_addr != ULONG_MAX) { > + mce->u.ue_error.physical_address_provided = true; > + mce->u.ue_error.physical_address = phys_addr; > + } > } > return; Where is "addr" coming from ? Keep in mind that on P9 at least, a UE will *not* give you an EA in DAR in most cases. Ben.
Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors
On Wed, Sep 6, 2017 at 10:36 AM, Nicholas Pigginwrote: > On Tue, 5 Sep 2017 14:15:54 +1000 > Balbir Singh wrote: > >> Walk the page table for NIP and extract the instruction. Then >> use the instruction to find the effective address via analyse_instr(). >> >> We might have page table walking races, but we expect them to >> be rare, the physical address extraction is best effort. The idea >> is to then hook up this infrastructure to memory failure eventually. > > Cool. Too bad hardware doesn't give us the RA. > >> >> Signed-off-by: Balbir Singh >> --- >> arch/powerpc/include/asm/mce.h | 2 +- >> arch/powerpc/kernel/mce.c | 6 - >> arch/powerpc/kernel/mce_power.c | 60 >> + >> 3 files changed, 61 insertions(+), 7 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h >> index 75292c7..3a1226e 100644 >> --- a/arch/powerpc/include/asm/mce.h >> +++ b/arch/powerpc/include/asm/mce.h >> @@ -204,7 +204,7 @@ struct mce_error_info { >> >> extern void save_mce_event(struct pt_regs *regs, long handled, >> struct mce_error_info *mce_err, uint64_t nip, >> -uint64_t addr); >> +uint64_t addr, uint64_t phys_addr); >> extern int get_mce_event(struct machine_check_event *mce, bool release); >> extern void release_mce_event(void); >> extern void machine_check_queue_event(void); >> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c >> index e254399..f41a75d 100644 >> --- a/arch/powerpc/kernel/mce.c >> +++ b/arch/powerpc/kernel/mce.c >> @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event >> *mce, >> */ >> void save_mce_event(struct pt_regs *regs, long handled, >> struct mce_error_info *mce_err, >> - uint64_t nip, uint64_t addr) >> + uint64_t nip, uint64_t addr, uint64_t phys_addr) >> { >> int index = __this_cpu_inc_return(mce_nest_count) - 1; >> struct machine_check_event *mce = this_cpu_ptr(_event[index]); >> @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled, >> } else if (mce->error_type == MCE_ERROR_TYPE_UE) { >> mce->u.ue_error.effective_address_provided = true; >> mce->u.ue_error.effective_address = addr; >> + if (phys_addr != ULONG_MAX) { >> + mce->u.ue_error.physical_address_provided = true; >> + mce->u.ue_error.physical_address = phys_addr; >> + } >> } >> return; >> } >> diff --git a/arch/powerpc/kernel/mce_power.c >> b/arch/powerpc/kernel/mce_power.c >> index b76ca19..b77a698 100644 >> --- a/arch/powerpc/kernel/mce_power.c >> +++ b/arch/powerpc/kernel/mce_power.c >> @@ -27,6 +27,25 @@ >> #include >> #include >> #include >> +#include >> +#include >> +#include >> + >> +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr) >> +{ >> + pte_t *ptep; >> + unsigned long flags; >> + >> + local_irq_save(flags); >> + if (mm == current->mm) >> + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); >> + else >> + ptep = find_init_mm_pte(addr, NULL); >> + local_irq_restore(flags); >> + if (!ptep) >> + return ULONG_MAX; >> + return pte_pfn(*ptep); > > I think you need to check that it's still cacheable memory here? > !pte_speical && pfn <= highest_memmap_pfn? > find_*pte will return a NULL PTE, so we do have a check there. !pte_special is a good check to have, I'll add it > >> +} >> >> static void flush_tlb_206(unsigned int num_sets, unsigned int action) >> { >> @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs, >> >> static int mce_handle_derror(struct pt_regs *regs, >> const struct mce_derror_table table[], >> - struct mce_error_info *mce_err, uint64_t *addr) >> + struct mce_error_info *mce_err, uint64_t *addr, >> + uint64_t *phys_addr) >> { >> uint64_t dsisr = regs->dsisr; >> int handled = 0; >> @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs, >> mce_err->initiator = table[i].initiator; >> if (table[i].dar_valid) >> *addr = regs->dar; >> - >> + else if (mce_err->severity == MCE_SEV_ERROR_SYNC && >> + table[i].error_type == MCE_ERROR_TYPE_UE) { >> + /* >> + * Carefully look at the NIP to determine >> + * the instruction to analyse. Reading the NIP >> + * in real-mode is tricky and can lead to recursive >> + * faults >> + */ > > What recursive faults? If you ensure NIP is cacheable memory, I guess you > can get a recursive machine
Re: [rfc 2/3] powerpc/mce: Extract physical_address for UE errors
On Tue, 5 Sep 2017 14:15:54 +1000 Balbir Singhwrote: > Walk the page table for NIP and extract the instruction. Then > use the instruction to find the effective address via analyse_instr(). > > We might have page table walking races, but we expect them to > be rare, the physical address extraction is best effort. The idea > is to then hook up this infrastructure to memory failure eventually. Cool. Too bad hardware doesn't give us the RA. > > Signed-off-by: Balbir Singh > --- > arch/powerpc/include/asm/mce.h | 2 +- > arch/powerpc/kernel/mce.c | 6 - > arch/powerpc/kernel/mce_power.c | 60 > + > 3 files changed, 61 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h > index 75292c7..3a1226e 100644 > --- a/arch/powerpc/include/asm/mce.h > +++ b/arch/powerpc/include/asm/mce.h > @@ -204,7 +204,7 @@ struct mce_error_info { > > extern void save_mce_event(struct pt_regs *regs, long handled, > struct mce_error_info *mce_err, uint64_t nip, > -uint64_t addr); > +uint64_t addr, uint64_t phys_addr); > extern int get_mce_event(struct machine_check_event *mce, bool release); > extern void release_mce_event(void); > extern void machine_check_queue_event(void); > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index e254399..f41a75d 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event > *mce, > */ > void save_mce_event(struct pt_regs *regs, long handled, > struct mce_error_info *mce_err, > - uint64_t nip, uint64_t addr) > + uint64_t nip, uint64_t addr, uint64_t phys_addr) > { > int index = __this_cpu_inc_return(mce_nest_count) - 1; > struct machine_check_event *mce = this_cpu_ptr(_event[index]); > @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled, > } else if (mce->error_type == MCE_ERROR_TYPE_UE) { > mce->u.ue_error.effective_address_provided = true; > mce->u.ue_error.effective_address = addr; > + if (phys_addr != ULONG_MAX) { > + mce->u.ue_error.physical_address_provided = true; > + mce->u.ue_error.physical_address = phys_addr; > + } > } > return; > } > diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c > index b76ca19..b77a698 100644 > --- a/arch/powerpc/kernel/mce_power.c > +++ b/arch/powerpc/kernel/mce_power.c > @@ -27,6 +27,25 @@ > #include > #include > #include > +#include > +#include > +#include > + > +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr) > +{ > + pte_t *ptep; > + unsigned long flags; > + > + local_irq_save(flags); > + if (mm == current->mm) > + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); > + else > + ptep = find_init_mm_pte(addr, NULL); > + local_irq_restore(flags); > + if (!ptep) > + return ULONG_MAX; > + return pte_pfn(*ptep); I think you need to check that it's still cacheable memory here? !pte_speical && pfn <= highest_memmap_pfn? > +} > > static void flush_tlb_206(unsigned int num_sets, unsigned int action) > { > @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs, > > static int mce_handle_derror(struct pt_regs *regs, > const struct mce_derror_table table[], > - struct mce_error_info *mce_err, uint64_t *addr) > + struct mce_error_info *mce_err, uint64_t *addr, > + uint64_t *phys_addr) > { > uint64_t dsisr = regs->dsisr; > int handled = 0; > @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs, > mce_err->initiator = table[i].initiator; > if (table[i].dar_valid) > *addr = regs->dar; > - > + else if (mce_err->severity == MCE_SEV_ERROR_SYNC && > + table[i].error_type == MCE_ERROR_TYPE_UE) { > + /* > + * Carefully look at the NIP to determine > + * the instruction to analyse. Reading the NIP > + * in real-mode is tricky and can lead to recursive > + * faults > + */ What recursive faults? If you ensure NIP is cacheable memory, I guess you can get a recursive machine check from reading it, but that's probably tolerable. > + int instr; > + struct mm_struct *mm; > + unsigned long nip = regs->nip; > + unsigned long pfn = 0, instr_addr; > + struct instruction_op op; > +
[rfc 2/3] powerpc/mce: Extract physical_address for UE errors
Walk the page table for NIP and extract the instruction. Then use the instruction to find the effective address via analyse_instr(). We might have page table walking races, but we expect them to be rare, the physical address extraction is best effort. The idea is to then hook up this infrastructure to memory failure eventually. Signed-off-by: Balbir Singh--- arch/powerpc/include/asm/mce.h | 2 +- arch/powerpc/kernel/mce.c | 6 - arch/powerpc/kernel/mce_power.c | 60 + 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 75292c7..3a1226e 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -204,7 +204,7 @@ struct mce_error_info { extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, - uint64_t addr); + uint64_t addr, uint64_t phys_addr); extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); extern void machine_check_queue_event(void); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index e254399..f41a75d 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -82,7 +82,7 @@ static void mce_set_error_info(struct machine_check_event *mce, */ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, - uint64_t nip, uint64_t addr) + uint64_t nip, uint64_t addr, uint64_t phys_addr) { int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(_event[index]); @@ -140,6 +140,10 @@ void save_mce_event(struct pt_regs *regs, long handled, } else if (mce->error_type == MCE_ERROR_TYPE_UE) { mce->u.ue_error.effective_address_provided = true; mce->u.ue_error.effective_address = addr; + if (phys_addr != ULONG_MAX) { + mce->u.ue_error.physical_address_provided = true; + mce->u.ue_error.physical_address = phys_addr; + } } return; } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b76ca19..b77a698 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -27,6 +27,25 @@ #include #include #include +#include +#include +#include + +static unsigned long addr_to_pfn(struct mm_struct *mm, unsigned long addr) +{ + pte_t *ptep; + unsigned long flags; + + local_irq_save(flags); + if (mm == current->mm) + ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); + else + ptep = find_init_mm_pte(addr, NULL); + local_irq_restore(flags); + if (!ptep) + return ULONG_MAX; + return pte_pfn(*ptep); +} static void flush_tlb_206(unsigned int num_sets, unsigned int action) { @@ -489,7 +508,8 @@ static int mce_handle_ierror(struct pt_regs *regs, static int mce_handle_derror(struct pt_regs *regs, const struct mce_derror_table table[], - struct mce_error_info *mce_err, uint64_t *addr) + struct mce_error_info *mce_err, uint64_t *addr, + uint64_t *phys_addr) { uint64_t dsisr = regs->dsisr; int handled = 0; @@ -555,7 +575,37 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - + else if (mce_err->severity == MCE_SEV_ERROR_SYNC && + table[i].error_type == MCE_ERROR_TYPE_UE) { + /* +* Carefully look at the NIP to determine +* the instruction to analyse. Reading the NIP +* in real-mode is tricky and can lead to recursive +* faults +*/ + int instr; + struct mm_struct *mm; + unsigned long nip = regs->nip; + unsigned long pfn = 0, instr_addr; + struct instruction_op op; + struct pt_regs tmp = *regs; + + if (user_mode(regs)) + mm = current->mm; + else + mm = _mm; + + pfn = addr_to_pfn(mm, nip); + if (pfn != ULONG_MAX) { + instr_addr = (pfn << PAGE_SHIFT) + (nip & ~PAGE_MASK); + instr = *(unsigned int *)(instr_addr); + if (!analyse_instr(, ,