Re: [PATCH v2 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-14 Thread Sergey Miroshnichenko


On 9/12/18 1:39 PM,  wrote:
> On Mon, 2018-09-10 at 19:00 +0300, Sergey Miroshnichenko wrote:
>>
>> Yes, missing a real EEH event is possible, unfortunately, and it is
>> indeed worth mentioning.
>>
>> To reduce this probability the next patchset I'll post in a few days
>> among other things puts all the affected device drivers to pause during
>> rescan, mainly because of moving BARs and bridge windows, but it will
>> also help here a bit.
> 
> How do you deal with moving BARs etc... within the segmenting
> restrictions of EEH ?
> 

Actually, [un]fortunately, we haven't encountered any segmenting issues
yet, but to move BARs we are using the same existing mechanism in Linux
kernel that re-enumerated the PCIe topology during startup with
"pci=realloc" and PCI_REASSIGN_ALL_BUS. What restrictions must be broken
to provoke a segmenting event?

Are there any other limitations on segmenting besides keeping all the
BARs of the PHB within its huge M32+M64 segments which are 2GiB+4GiB on
our setup?

> It's a horrible mess right now and I don't know if the current code can
> even work properly to be honest.
> 
> Cheers,
> Ben.
> 
> 

Best regards,
Serge


Re: [PATCH v2 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-12 Thread Benjamin Herrenschmidt
On Mon, 2018-09-10 at 19:00 +0300, Sergey Miroshnichenko wrote:
> 
> Yes, missing a real EEH event is possible, unfortunately, and it is
> indeed worth mentioning.
> 
> To reduce this probability the next patchset I'll post in a few days
> among other things puts all the affected device drivers to pause during
> rescan, mainly because of moving BARs and bridge windows, but it will
> also help here a bit.

How do you deal with moving BARs etc... within the segmenting
restrictions of EEH ?

It's a horrible mess right now and I don't know if the current code can
even work properly to be honest.

Cheers,
Ben.




Re: [PATCH v2 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-10 Thread Sergey Miroshnichenko
Hello Sam,

On 9/10/18 8:03 AM, Sam Bobroff wrote:
> Hi Sergey,
> 
> On Thu, Sep 06, 2018 at 02:57:52PM +0300, Sergey Miroshnichenko wrote:
>> Reading an empty slot returns all ones, which triggers a false
>> EEH error event on PowerNV.
>>
>> New callbacks pcibios_rescan_prepare/done are introduced to
>> pause/resume the EEH during rescan.
> 
> If I understand it correctly, this temporarily disables EEH for config space
> accesses on the whole PHB while the rescan runs. Is it possible that a
> real EEH event could be missed if it occurred during the rescan?
> 
> Even if it's not possible, I think it would be good to mention that in a
> comment.

Yes, missing a real EEH event is possible, unfortunately, and it is
indeed worth mentioning.

To reduce this probability the next patchset I'll post in a few days
among other things puts all the affected device drivers to pause during
rescan, mainly because of moving BARs and bridge windows, but it will
also help here a bit.

> 
>> Signed-off-by: Sergey Miroshnichenko 
>> ---
>>  arch/powerpc/include/asm/eeh.h   |  2 ++
>>  arch/powerpc/kernel/eeh.c| 12 +++
>>  arch/powerpc/platforms/powernv/eeh-powernv.c | 22 
>>  drivers/pci/probe.c  | 14 +
>>  include/linux/pci.h  |  2 ++
>>  5 files changed, 52 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>> index 219637ea69a1..926c3e31df99 100644
>> --- a/arch/powerpc/include/asm/eeh.h
>> +++ b/arch/powerpc/include/asm/eeh.h
>> @@ -219,6 +219,8 @@ struct eeh_ops {
>>  int (*next_error)(struct eeh_pe **pe);
>>  int (*restore_config)(struct pci_dn *pdn);
>>  int (*notify_resume)(struct pci_dn *pdn);
>> +int (*pause)(struct pci_bus *bus);
>> +int (*resume)(struct pci_bus *bus);
> 
> I think these names are a bit too generic, what about naming them
> pause_bus()/resume_bus() or even prepare_rescan()/rescan_done()?
> 

Thanks! I will rename them to rescan_prepare/rescan_done to make friends
with reset_prepare/reset_done from struct pci_error_handlers.

>>  };
>>  
>>  extern int eeh_subsystem_flags;
>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>> index 6ebba3e48b01..9fb5012f389d 100644
>> --- a/arch/powerpc/kernel/eeh.c
>> +++ b/arch/powerpc/kernel/eeh.c
>> @@ -1831,3 +1831,15 @@ static int __init eeh_init_proc(void)
>>  return 0;
>>  }
>>  __initcall(eeh_init_proc);
>> +
>> +void pcibios_rescan_prepare(struct pci_bus *bus)
>> +{
>> +if (eeh_ops && eeh_ops->pause)
>> +eeh_ops->pause(bus);
>> +}
>> +
>> +void pcibios_rescan_done(struct pci_bus *bus)
>> +{
>> +if (eeh_ops && eeh_ops->resume)
>> +eeh_ops->resume(bus);
>> +}
>> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
>> b/arch/powerpc/platforms/powernv/eeh-powernv.c
>> index 3c1beae29f2d..9724a58afcd2 100644
>> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
>> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
>> @@ -59,6 +59,26 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
>>  eeh_sysfs_add_device(pdev);
>>  }
>>  
>> +static int pnv_eeh_pause(struct pci_bus *bus)
>> +{
>> +struct pci_controller *hose = pci_bus_to_host(bus);
>> +struct pnv_phb *phb = hose->private_data;
>> +
>> +phb->flags &= ~PNV_PHB_FLAG_EEH;
>> +disable_irq(eeh_event_irq);
>> +return 0;
>> +}
>> +
>> +static int pnv_eeh_resume(struct pci_bus *bus)
>> +{
>> +struct pci_controller *hose = pci_bus_to_host(bus);
>> +struct pnv_phb *phb = hose->private_data;
>> +
>> +enable_irq(eeh_event_irq);
>> +phb->flags |= PNV_PHB_FLAG_EEH;
>> +return 0;
>> +}
>> +
>>  static int pnv_eeh_init(void)
>>  {
>>  struct pci_controller *hose;
>> @@ -1710,6 +1730,8 @@ static struct eeh_ops pnv_eeh_ops = {
>>  .write_config   = pnv_eeh_write_config,
>>  .next_error = pnv_eeh_next_error,
>>  .restore_config = pnv_eeh_restore_config,
>> +.pause  = pnv_eeh_pause,
>> +.resume = pnv_eeh_resume,
>>  .notify_resume  = NULL
>>  };
>>  
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index ac876e32de4b..4a9045364809 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -2801,6 +2801,14 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
>>  {
>>  }
>>  
>> +void __weak pcibios_rescan_prepare(struct pci_bus *bus)
>> +{
>> +}
>> +
>> +void __weak pcibios_rescan_done(struct pci_bus *bus)
>> +{
>> +}
>> +
>>  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>>  struct pci_ops *ops, void *sysdata, struct list_head *resources)
>>  {
>> @@ -3055,9 +3063,15 @@ unsigned int pci_rescan_bus_bridge_resize(struct 
>> pci_dev *bridge)
>>  unsigned int pci_rescan_bus(struct pci_bus *bus)
>>  {
>>  unsigned int max;
>> +struct pci_bus *root = bus;
>> +
>> +while 

Re: [PATCH v2 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-09 Thread Sam Bobroff
Hi Sergey,

On Thu, Sep 06, 2018 at 02:57:52PM +0300, Sergey Miroshnichenko wrote:
> Reading an empty slot returns all ones, which triggers a false
> EEH error event on PowerNV.
> 
> New callbacks pcibios_rescan_prepare/done are introduced to
> pause/resume the EEH during rescan.

If I understand it correctly, this temporarily disables EEH for config space
accesses on the whole PHB while the rescan runs. Is it possible that a
real EEH event could be missed if it occurred during the rescan?

Even if it's not possible, I think it would be good to mention that in a
comment.

> Signed-off-by: Sergey Miroshnichenko 
> ---
>  arch/powerpc/include/asm/eeh.h   |  2 ++
>  arch/powerpc/kernel/eeh.c| 12 +++
>  arch/powerpc/platforms/powernv/eeh-powernv.c | 22 
>  drivers/pci/probe.c  | 14 +
>  include/linux/pci.h  |  2 ++
>  5 files changed, 52 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 219637ea69a1..926c3e31df99 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -219,6 +219,8 @@ struct eeh_ops {
>   int (*next_error)(struct eeh_pe **pe);
>   int (*restore_config)(struct pci_dn *pdn);
>   int (*notify_resume)(struct pci_dn *pdn);
> + int (*pause)(struct pci_bus *bus);
> + int (*resume)(struct pci_bus *bus);

I think these names are a bit too generic, what about naming them
pause_bus()/resume_bus() or even prepare_rescan()/rescan_done()?

>  };
>  
>  extern int eeh_subsystem_flags;
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 6ebba3e48b01..9fb5012f389d 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1831,3 +1831,15 @@ static int __init eeh_init_proc(void)
>   return 0;
>  }
>  __initcall(eeh_init_proc);
> +
> +void pcibios_rescan_prepare(struct pci_bus *bus)
> +{
> + if (eeh_ops && eeh_ops->pause)
> + eeh_ops->pause(bus);
> +}
> +
> +void pcibios_rescan_done(struct pci_bus *bus)
> +{
> + if (eeh_ops && eeh_ops->resume)
> + eeh_ops->resume(bus);
> +}
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
> b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 3c1beae29f2d..9724a58afcd2 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -59,6 +59,26 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
>   eeh_sysfs_add_device(pdev);
>  }
>  
> +static int pnv_eeh_pause(struct pci_bus *bus)
> +{
> + struct pci_controller *hose = pci_bus_to_host(bus);
> + struct pnv_phb *phb = hose->private_data;
> +
> + phb->flags &= ~PNV_PHB_FLAG_EEH;
> + disable_irq(eeh_event_irq);
> + return 0;
> +}
> +
> +static int pnv_eeh_resume(struct pci_bus *bus)
> +{
> + struct pci_controller *hose = pci_bus_to_host(bus);
> + struct pnv_phb *phb = hose->private_data;
> +
> + enable_irq(eeh_event_irq);
> + phb->flags |= PNV_PHB_FLAG_EEH;
> + return 0;
> +}
> +
>  static int pnv_eeh_init(void)
>  {
>   struct pci_controller *hose;
> @@ -1710,6 +1730,8 @@ static struct eeh_ops pnv_eeh_ops = {
>   .write_config   = pnv_eeh_write_config,
>   .next_error = pnv_eeh_next_error,
>   .restore_config = pnv_eeh_restore_config,
> + .pause  = pnv_eeh_pause,
> + .resume = pnv_eeh_resume,
>   .notify_resume  = NULL
>  };
>  
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index ac876e32de4b..4a9045364809 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2801,6 +2801,14 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
>  {
>  }
>  
> +void __weak pcibios_rescan_prepare(struct pci_bus *bus)
> +{
> +}
> +
> +void __weak pcibios_rescan_done(struct pci_bus *bus)
> +{
> +}
> +
>  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>   struct pci_ops *ops, void *sysdata, struct list_head *resources)
>  {
> @@ -3055,9 +3063,15 @@ unsigned int pci_rescan_bus_bridge_resize(struct 
> pci_dev *bridge)
>  unsigned int pci_rescan_bus(struct pci_bus *bus)
>  {
>   unsigned int max;
> + struct pci_bus *root = bus;
> +
> + while (!pci_is_root_bus(root))
> + root = root->parent;
>  
> + pcibios_rescan_prepare(root);
>   max = pci_scan_child_bus(bus);
>   pci_assign_unassigned_bus_resources(bus);
> + pcibios_rescan_done(root);
>   pci_bus_add_devices(bus);
>  
>   return max;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 340029b2fb38..42930731c5a7 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1929,6 +1929,8 @@ void pcibios_penalize_isa_irq(int irq, int active);
>  int pcibios_alloc_irq(struct pci_dev *dev);
>  void pcibios_free_irq(struct pci_dev *dev);
>  resource_size_t 

[PATCH v2 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-06 Thread Sergey Miroshnichenko
Reading an empty slot returns all ones, which triggers a false
EEH error event on PowerNV.

New callbacks pcibios_rescan_prepare/done are introduced to
pause/resume the EEH during rescan.

Signed-off-by: Sergey Miroshnichenko 
---
 arch/powerpc/include/asm/eeh.h   |  2 ++
 arch/powerpc/kernel/eeh.c| 12 +++
 arch/powerpc/platforms/powernv/eeh-powernv.c | 22 
 drivers/pci/probe.c  | 14 +
 include/linux/pci.h  |  2 ++
 5 files changed, 52 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 219637ea69a1..926c3e31df99 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -219,6 +219,8 @@ struct eeh_ops {
int (*next_error)(struct eeh_pe **pe);
int (*restore_config)(struct pci_dn *pdn);
int (*notify_resume)(struct pci_dn *pdn);
+   int (*pause)(struct pci_bus *bus);
+   int (*resume)(struct pci_bus *bus);
 };
 
 extern int eeh_subsystem_flags;
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6ebba3e48b01..9fb5012f389d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1831,3 +1831,15 @@ static int __init eeh_init_proc(void)
return 0;
 }
 __initcall(eeh_init_proc);
+
+void pcibios_rescan_prepare(struct pci_bus *bus)
+{
+   if (eeh_ops && eeh_ops->pause)
+   eeh_ops->pause(bus);
+}
+
+void pcibios_rescan_done(struct pci_bus *bus)
+{
+   if (eeh_ops && eeh_ops->resume)
+   eeh_ops->resume(bus);
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3c1beae29f2d..9724a58afcd2 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -59,6 +59,26 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
eeh_sysfs_add_device(pdev);
 }
 
+static int pnv_eeh_pause(struct pci_bus *bus)
+{
+   struct pci_controller *hose = pci_bus_to_host(bus);
+   struct pnv_phb *phb = hose->private_data;
+
+   phb->flags &= ~PNV_PHB_FLAG_EEH;
+   disable_irq(eeh_event_irq);
+   return 0;
+}
+
+static int pnv_eeh_resume(struct pci_bus *bus)
+{
+   struct pci_controller *hose = pci_bus_to_host(bus);
+   struct pnv_phb *phb = hose->private_data;
+
+   enable_irq(eeh_event_irq);
+   phb->flags |= PNV_PHB_FLAG_EEH;
+   return 0;
+}
+
 static int pnv_eeh_init(void)
 {
struct pci_controller *hose;
@@ -1710,6 +1730,8 @@ static struct eeh_ops pnv_eeh_ops = {
.write_config   = pnv_eeh_write_config,
.next_error = pnv_eeh_next_error,
.restore_config = pnv_eeh_restore_config,
+   .pause  = pnv_eeh_pause,
+   .resume = pnv_eeh_resume,
.notify_resume  = NULL
 };
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..4a9045364809 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2801,6 +2801,14 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
 {
 }
 
+void __weak pcibios_rescan_prepare(struct pci_bus *bus)
+{
+}
+
+void __weak pcibios_rescan_done(struct pci_bus *bus)
+{
+}
+
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
@@ -3055,9 +3063,15 @@ unsigned int pci_rescan_bus_bridge_resize(struct pci_dev 
*bridge)
 unsigned int pci_rescan_bus(struct pci_bus *bus)
 {
unsigned int max;
+   struct pci_bus *root = bus;
+
+   while (!pci_is_root_bus(root))
+   root = root->parent;
 
+   pcibios_rescan_prepare(root);
max = pci_scan_child_bus(bus);
pci_assign_unassigned_bus_resources(bus);
+   pcibios_rescan_done(root);
pci_bus_add_devices(bus);
 
return max;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..42930731c5a7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1929,6 +1929,8 @@ void pcibios_penalize_isa_irq(int irq, int active);
 int pcibios_alloc_irq(struct pci_dev *dev);
 void pcibios_free_irq(struct pci_dev *dev);
 resource_size_t pcibios_default_alignment(void);
+void pcibios_rescan_prepare(struct pci_bus *bus);
+void pcibios_rescan_done(struct pci_bus *bus);
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 extern struct dev_pm_ops pcibios_pm_ops;
-- 
2.17.1