Re: [PATCH RFC v4 2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot
On 3/5/19 9:14 AM, Oliver wrote: > On Sat, Mar 2, 2019 at 3:04 AM Sergey Miroshnichenko > wrote: >> >> Reading an empty slot returns all ones, which triggers a false >> EEH error event on PowerNV. This patch unfreezes the bus where >> it has happened. >> >> Signed-off-by: Sergey Miroshnichenko >> --- >> arch/powerpc/include/asm/ppc-pci.h | 1 + >> arch/powerpc/kernel/pci_dn.c | 2 +- >> arch/powerpc/platforms/powernv/pci.c | 34 >> 3 files changed, 32 insertions(+), 5 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/ppc-pci.h >> b/arch/powerpc/include/asm/ppc-pci.h >> index f67da277d652..737393c54f58 100644 >> --- a/arch/powerpc/include/asm/ppc-pci.h >> +++ b/arch/powerpc/include/asm/ppc-pci.h >> @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root, >> void *(*fn)(struct pci_dn *, void *), >> void *data); >> extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); >> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus); >> >> /* From rtas_pci.h */ >> extern void init_pci_config_tokens (void); >> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c >> index ab147a1909c8..341ed71250f1 100644 >> --- a/arch/powerpc/kernel/pci_dn.c >> +++ b/arch/powerpc/kernel/pci_dn.c >> @@ -40,7 +40,7 @@ >> * one of PF's bridge. For other devices, their firmware >> * data is linked to that of their bridge. >> */ >> -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) >> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) >> { >> struct pci_bus *pbus; >> struct device_node *dn; >> diff --git a/arch/powerpc/platforms/powernv/pci.c >> b/arch/powerpc/platforms/powernv/pci.c >> index 3260250d2029..73c2d0aed996 100644 >> --- a/arch/powerpc/platforms/powernv/pci.c >> +++ b/arch/powerpc/platforms/powernv/pci.c >> @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn) >> } >> #endif /* CONFIG_EEH */ >> >> +static int get_bus_pe_number(struct pci_bus *bus) >> +{ >> + struct pci_dn *pdn = pci_bus_to_pdn(bus); >> + struct pci_dn *child; >> + >> + if (!pdn) >> + return IODA_INVALID_PE; >> + >> + list_for_each_entry(child, >child_list, list) >> + if (child->pe_number != IODA_INVALID_PE) >> + return child->pe_number; >> + >> + return IODA_INVALID_PE; >> +} >> + >> static int pnv_pci_read_config(struct pci_bus *bus, >>unsigned int devfn, >>int where, int size, u32 *val) >> @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus, >> struct pci_controller *hose = pci_bus_to_host(bus); >> struct pnv_phb *phb = hose->private_data; >> int ret; >> + u32 empty_val = 0x; >> >> - *val = 0x; >> + *val = empty_val; >> pdn = pci_get_pdn_by_devfn(bus, devfn); >> - if (!pdn) >> - return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn, >> - where, size, val); >> + if (!pdn) { >> + int pe_number = get_bus_pe_number(bus); >> + >> + ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn, >> + where, size, val); >> + >> + if (!ret && (*val == empty_val) && phb->unfreeze_pe) > > Do this empty val check work when using 1 or 2 byte cfg accesses? > That was intentional because 0xff and 0x are valid values, but the 0x is the only reliable sign of an empty slot. And the kernel pokes a slot by the pci_bus_generic_read_dev_vendor_id() function, which in turn tries to pci_bus_read_config_dword(PCI_VENDOR_ID). But I haven't tried actually to read 1-2 bytes from an empty slot to test if that triggers an EEH. If it does, I'll change that to EEH_IO_ERROR_VALUE(size). >> + phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) >> ? >> +0xff : pe_number, > > Use phb->ioda.reserved_pe_idx rather than guessing that 0xff is safe > to use. On P9 we have PHBs with 512 PEs and some older P8 firmware > releases used 0 as the reserved PE rather than 0xff. > Thanks for the catch! I'll fix that in v5. Best regards, Serge >> +OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); >> + >> + return ret; >> + } >> >> if (!pnv_pci_cfg_check(pdn)) >> return PCIBIOS_DEVICE_NOT_FOUND; >> -- >> 2.20.1 >> signature.asc Description: OpenPGP digital signature
Re: [PATCH RFC v4 2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot
On Sat, Mar 2, 2019 at 3:04 AM Sergey Miroshnichenko wrote: > > Reading an empty slot returns all ones, which triggers a false > EEH error event on PowerNV. This patch unfreezes the bus where > it has happened. > > Signed-off-by: Sergey Miroshnichenko > --- > arch/powerpc/include/asm/ppc-pci.h | 1 + > arch/powerpc/kernel/pci_dn.c | 2 +- > arch/powerpc/platforms/powernv/pci.c | 34 > 3 files changed, 32 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/include/asm/ppc-pci.h > b/arch/powerpc/include/asm/ppc-pci.h > index f67da277d652..737393c54f58 100644 > --- a/arch/powerpc/include/asm/ppc-pci.h > +++ b/arch/powerpc/include/asm/ppc-pci.h > @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root, > void *(*fn)(struct pci_dn *, void *), > void *data); > extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); > +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus); > > /* From rtas_pci.h */ > extern void init_pci_config_tokens (void); > diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c > index ab147a1909c8..341ed71250f1 100644 > --- a/arch/powerpc/kernel/pci_dn.c > +++ b/arch/powerpc/kernel/pci_dn.c > @@ -40,7 +40,7 @@ > * one of PF's bridge. For other devices, their firmware > * data is linked to that of their bridge. > */ > -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) > +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) > { > struct pci_bus *pbus; > struct device_node *dn; > diff --git a/arch/powerpc/platforms/powernv/pci.c > b/arch/powerpc/platforms/powernv/pci.c > index 3260250d2029..73c2d0aed996 100644 > --- a/arch/powerpc/platforms/powernv/pci.c > +++ b/arch/powerpc/platforms/powernv/pci.c > @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn) > } > #endif /* CONFIG_EEH */ > > +static int get_bus_pe_number(struct pci_bus *bus) > +{ > + struct pci_dn *pdn = pci_bus_to_pdn(bus); > + struct pci_dn *child; > + > + if (!pdn) > + return IODA_INVALID_PE; > + > + list_for_each_entry(child, >child_list, list) > + if (child->pe_number != IODA_INVALID_PE) > + return child->pe_number; > + > + return IODA_INVALID_PE; > +} > + > static int pnv_pci_read_config(struct pci_bus *bus, >unsigned int devfn, >int where, int size, u32 *val) > @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus, > struct pci_controller *hose = pci_bus_to_host(bus); > struct pnv_phb *phb = hose->private_data; > int ret; > + u32 empty_val = 0x; > > - *val = 0x; > + *val = empty_val; > pdn = pci_get_pdn_by_devfn(bus, devfn); > - if (!pdn) > - return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn, > - where, size, val); > + if (!pdn) { > + int pe_number = get_bus_pe_number(bus); > + > + ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn, > + where, size, val); > + > + if (!ret && (*val == empty_val) && phb->unfreeze_pe) Do this empty val check work when using 1 or 2 byte cfg accesses? > + phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ? > +0xff : pe_number, Use phb->ioda.reserved_pe_idx rather than guessing that 0xff is safe to use. On P9 we have PHBs with 512 PEs and some older P8 firmware releases used 0 as the reserved PE rather than 0xff. > +OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); > + > + return ret; > + } > > if (!pnv_pci_cfg_check(pdn)) > return PCIBIOS_DEVICE_NOT_FOUND; > -- > 2.20.1 >
[PATCH RFC v4 2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot
Reading an empty slot returns all ones, which triggers a false EEH error event on PowerNV. This patch unfreezes the bus where it has happened. Signed-off-by: Sergey Miroshnichenko --- arch/powerpc/include/asm/ppc-pci.h | 1 + arch/powerpc/kernel/pci_dn.c | 2 +- arch/powerpc/platforms/powernv/pci.c | 34 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index f67da277d652..737393c54f58 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root, void *(*fn)(struct pci_dn *, void *), void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus); /* From rtas_pci.h */ extern void init_pci_config_tokens (void); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index ab147a1909c8..341ed71250f1 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -40,7 +40,7 @@ * one of PF's bridge. For other devices, their firmware * data is linked to that of their bridge. */ -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) { struct pci_bus *pbus; struct device_node *dn; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 3260250d2029..73c2d0aed996 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn) } #endif /* CONFIG_EEH */ +static int get_bus_pe_number(struct pci_bus *bus) +{ + struct pci_dn *pdn = pci_bus_to_pdn(bus); + struct pci_dn *child; + + if (!pdn) + return IODA_INVALID_PE; + + list_for_each_entry(child, >child_list, list) + if (child->pe_number != IODA_INVALID_PE) + return child->pe_number; + + return IODA_INVALID_PE; +} + static int pnv_pci_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus, struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; int ret; + u32 empty_val = 0x; - *val = 0x; + *val = empty_val; pdn = pci_get_pdn_by_devfn(bus, devfn); - if (!pdn) - return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn, - where, size, val); + if (!pdn) { + int pe_number = get_bus_pe_number(bus); + + ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn, + where, size, val); + + if (!ret && (*val == empty_val) && phb->unfreeze_pe) + phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ? +0xff : pe_number, +OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + + return ret; + } if (!pnv_pci_cfg_check(pdn)) return PCIBIOS_DEVICE_NOT_FOUND; -- 2.20.1