Re: [PATCH RFC v4 2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot

2019-03-05 Thread Sergey Miroshnichenko


On 3/5/19 9:14 AM, Oliver wrote:
> On Sat, Mar 2, 2019 at 3:04 AM Sergey Miroshnichenko
>  wrote:
>>
>> Reading an empty slot returns all ones, which triggers a false
>> EEH error event on PowerNV. This patch unfreezes the bus where
>> it has happened.
>>
>> Signed-off-by: Sergey Miroshnichenko 
>> ---
>>  arch/powerpc/include/asm/ppc-pci.h   |  1 +
>>  arch/powerpc/kernel/pci_dn.c |  2 +-
>>  arch/powerpc/platforms/powernv/pci.c | 34 
>>  3 files changed, 32 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/ppc-pci.h 
>> b/arch/powerpc/include/asm/ppc-pci.h
>> index f67da277d652..737393c54f58 100644
>> --- a/arch/powerpc/include/asm/ppc-pci.h
>> +++ b/arch/powerpc/include/asm/ppc-pci.h
>> @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root,
>>   void *(*fn)(struct pci_dn *, void *),
>>   void *data);
>>  extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
>> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus);
>>
>>  /* From rtas_pci.h */
>>  extern void init_pci_config_tokens (void);
>> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
>> index ab147a1909c8..341ed71250f1 100644
>> --- a/arch/powerpc/kernel/pci_dn.c
>> +++ b/arch/powerpc/kernel/pci_dn.c
>> @@ -40,7 +40,7 @@
>>   * one of PF's bridge. For other devices, their firmware
>>   * data is linked to that of their bridge.
>>   */
>> -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>>  {
>> struct pci_bus *pbus;
>> struct device_node *dn;
>> diff --git a/arch/powerpc/platforms/powernv/pci.c 
>> b/arch/powerpc/platforms/powernv/pci.c
>> index 3260250d2029..73c2d0aed996 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn)
>>  }
>>  #endif /* CONFIG_EEH */
>>
>> +static int get_bus_pe_number(struct pci_bus *bus)
>> +{
>> +   struct pci_dn *pdn = pci_bus_to_pdn(bus);
>> +   struct pci_dn *child;
>> +
>> +   if (!pdn)
>> +   return IODA_INVALID_PE;
>> +
>> +   list_for_each_entry(child, >child_list, list)
>> +   if (child->pe_number != IODA_INVALID_PE)
>> +   return child->pe_number;
>> +
>> +   return IODA_INVALID_PE;
>> +}
>> +
>>  static int pnv_pci_read_config(struct pci_bus *bus,
>>unsigned int devfn,
>>int where, int size, u32 *val)
>> @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>> struct pci_controller *hose = pci_bus_to_host(bus);
>> struct pnv_phb *phb = hose->private_data;
>> int ret;
>> +   u32 empty_val = 0x;
>>
>> -   *val = 0x;
>> +   *val = empty_val;
>> pdn = pci_get_pdn_by_devfn(bus, devfn);
>> -   if (!pdn)
>> -   return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
>> -   where, size, val);
>> +   if (!pdn) {
>> +   int pe_number = get_bus_pe_number(bus);
>> +
>> +   ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
>> +  where, size, val);
>> +
>> +   if (!ret && (*val == empty_val) && phb->unfreeze_pe)
> 
> Do this empty val check work when using 1 or 2 byte cfg accesses?
> 

That was intentional because 0xff and 0x are valid values, but the
0x is the only reliable sign of an empty slot. And the kernel
pokes a slot by the pci_bus_generic_read_dev_vendor_id() function, which
in turn tries to pci_bus_read_config_dword(PCI_VENDOR_ID).

But I haven't tried actually to read 1-2 bytes from an empty slot to
test if that triggers an EEH. If it does, I'll change that to
EEH_IO_ERROR_VALUE(size).

>> +   phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) 
>> ?
>> +0xff : pe_number,
> 
> Use phb->ioda.reserved_pe_idx rather than guessing that 0xff is safe
> to use. On P9 we have PHBs with 512 PEs and some older P8 firmware
> releases used 0 as the reserved PE rather than 0xff.
> 

Thanks for the catch! I'll fix that in v5.

Best regards,
Serge

>> +OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
>> +
>> +   return ret;
>> +   }
>>
>> if (!pnv_pci_cfg_check(pdn))
>> return PCIBIOS_DEVICE_NOT_FOUND;
>> --
>> 2.20.1
>>



signature.asc
Description: OpenPGP digital signature


Re: [PATCH RFC v4 2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot

2019-03-04 Thread Oliver
On Sat, Mar 2, 2019 at 3:04 AM Sergey Miroshnichenko
 wrote:
>
> Reading an empty slot returns all ones, which triggers a false
> EEH error event on PowerNV. This patch unfreezes the bus where
> it has happened.
>
> Signed-off-by: Sergey Miroshnichenko 
> ---
>  arch/powerpc/include/asm/ppc-pci.h   |  1 +
>  arch/powerpc/kernel/pci_dn.c |  2 +-
>  arch/powerpc/platforms/powernv/pci.c | 34 
>  3 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ppc-pci.h 
> b/arch/powerpc/include/asm/ppc-pci.h
> index f67da277d652..737393c54f58 100644
> --- a/arch/powerpc/include/asm/ppc-pci.h
> +++ b/arch/powerpc/include/asm/ppc-pci.h
> @@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root,
>   void *(*fn)(struct pci_dn *, void *),
>   void *data);
>  extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus);
>
>  /* From rtas_pci.h */
>  extern void init_pci_config_tokens (void);
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index ab147a1909c8..341ed71250f1 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -40,7 +40,7 @@
>   * one of PF's bridge. For other devices, their firmware
>   * data is linked to that of their bridge.
>   */
> -static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
> +struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>  {
> struct pci_bus *pbus;
> struct device_node *dn;
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index 3260250d2029..73c2d0aed996 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn)
>  }
>  #endif /* CONFIG_EEH */
>
> +static int get_bus_pe_number(struct pci_bus *bus)
> +{
> +   struct pci_dn *pdn = pci_bus_to_pdn(bus);
> +   struct pci_dn *child;
> +
> +   if (!pdn)
> +   return IODA_INVALID_PE;
> +
> +   list_for_each_entry(child, >child_list, list)
> +   if (child->pe_number != IODA_INVALID_PE)
> +   return child->pe_number;
> +
> +   return IODA_INVALID_PE;
> +}
> +
>  static int pnv_pci_read_config(struct pci_bus *bus,
>unsigned int devfn,
>int where, int size, u32 *val)
> @@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus,
> struct pci_controller *hose = pci_bus_to_host(bus);
> struct pnv_phb *phb = hose->private_data;
> int ret;
> +   u32 empty_val = 0x;
>
> -   *val = 0x;
> +   *val = empty_val;
> pdn = pci_get_pdn_by_devfn(bus, devfn);
> -   if (!pdn)
> -   return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
> -   where, size, val);
> +   if (!pdn) {
> +   int pe_number = get_bus_pe_number(bus);
> +
> +   ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
> +  where, size, val);
> +
> +   if (!ret && (*val == empty_val) && phb->unfreeze_pe)

Do this empty val check work when using 1 or 2 byte cfg accesses?

> +   phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ?
> +0xff : pe_number,

Use phb->ioda.reserved_pe_idx rather than guessing that 0xff is safe
to use. On P9 we have PHBs with 512 PEs and some older P8 firmware
releases used 0 as the reserved PE rather than 0xff.

> +OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
> +
> +   return ret;
> +   }
>
> if (!pnv_pci_cfg_check(pdn))
> return PCIBIOS_DEVICE_NOT_FOUND;
> --
> 2.20.1
>


[PATCH RFC v4 2/9] powerpc/powernv/pci: Suppress an EEH error when reading an empty slot

2019-03-01 Thread Sergey Miroshnichenko
Reading an empty slot returns all ones, which triggers a false
EEH error event on PowerNV. This patch unfreezes the bus where
it has happened.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/include/asm/ppc-pci.h   |  1 +
 arch/powerpc/kernel/pci_dn.c |  2 +-
 arch/powerpc/platforms/powernv/pci.c | 34 
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h 
b/arch/powerpc/include/asm/ppc-pci.h
index f67da277d652..737393c54f58 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -40,6 +40,7 @@ void *traverse_pci_dn(struct pci_dn *root,
  void *(*fn)(struct pci_dn *, void *),
  void *data);
 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
+struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus);
 
 /* From rtas_pci.h */
 extern void init_pci_config_tokens (void);
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index ab147a1909c8..341ed71250f1 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -40,7 +40,7 @@
  * one of PF's bridge. For other devices, their firmware
  * data is linked to that of their bridge.
  */
-static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
 {
struct pci_bus *pbus;
struct device_node *dn;
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 3260250d2029..73c2d0aed996 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -761,6 +761,21 @@ static inline pnv_pci_cfg_check(struct pci_dn *pdn)
 }
 #endif /* CONFIG_EEH */
 
+static int get_bus_pe_number(struct pci_bus *bus)
+{
+   struct pci_dn *pdn = pci_bus_to_pdn(bus);
+   struct pci_dn *child;
+
+   if (!pdn)
+   return IODA_INVALID_PE;
+
+   list_for_each_entry(child, >child_list, list)
+   if (child->pe_number != IODA_INVALID_PE)
+   return child->pe_number;
+
+   return IODA_INVALID_PE;
+}
+
 static int pnv_pci_read_config(struct pci_bus *bus,
   unsigned int devfn,
   int where, int size, u32 *val)
@@ -769,12 +784,23 @@ static int pnv_pci_read_config(struct pci_bus *bus,
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
int ret;
+   u32 empty_val = 0x;
 
-   *val = 0x;
+   *val = empty_val;
pdn = pci_get_pdn_by_devfn(bus, devfn);
-   if (!pdn)
-   return pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
-   where, size, val);
+   if (!pdn) {
+   int pe_number = get_bus_pe_number(bus);
+
+   ret = pnv_pci_cfg_read_raw(phb->opal_id, bus->number, devfn,
+  where, size, val);
+
+   if (!ret && (*val == empty_val) && phb->unfreeze_pe)
+   phb->unfreeze_pe(phb, (pe_number == IODA_INVALID_PE) ?
+0xff : pe_number,
+OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+   return ret;
+   }
 
if (!pnv_pci_cfg_check(pdn))
return PCIBIOS_DEVICE_NOT_FOUND;
-- 
2.20.1