Re: [PATCH v8 3/7] s390/pci: add support for IOMMU default DMA mode build options

2019-06-03 Thread Sebastian Ott


On Thu, 30 May 2019, Zhen Lei wrote:
> The default DMA mode is LAZY on s390, this patch make it can be set to
> STRICT at build time. It can be overridden by boot option.
> 
> There is no functional change.
> 
> Signed-off-by: Zhen Lei 

Acked-by: Sebastian Ott 



Re: [PATCH v5 4/6] s390/pci: add support for generic boot option iommu.dma_mode

2019-04-10 Thread Sebastian Ott
On Tue, 9 Apr 2019, Zhen Lei wrote:
> s390_iommu=strict is equivalent to iommu.dma_mode=strict.
> 
> Signed-off-by: Zhen Lei 

Acked-by: Sebastian Ott 



Re: [PATCH 1/2] PCI/IOV: provide flag to skip VF scanning

2018-12-21 Thread Sebastian Ott
Hello Bjorn,

On Thu, 20 Dec 2018, Bjorn Helgaas wrote:
> I think the strategy is fine, but can you restructure the patches
> like this:
> 
>   1) Factor out sriov_add_vfs() and sriov_dev_vfs().  This makes no
>  functional change at all.
> 
>   2) Add dev->no_vf_scan, set it in the s390 pcibios_add_device(), and
>  test it in sriov_add_vfs(), and sriov_del_vfs().
> 
> I think both pieces will be easier to review that way.

Done. I took the liberty to add Christoph's R-b to the first two patches
since it's just a split of the patch he gave the R-b to.

Thanks!
Sebastian



[PATCH 3/3] s390/pci: skip VF scanning

2018-12-21 Thread Sebastian Ott
Set the flag to skip scanning for VFs after SRIOV enablement.
VF creation will be triggered by the hotplug code.

Signed-off-by: Sebastian Ott 
Reviewed-by: Christoph Hellwig 
---
 arch/s390/pci/pci.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9f6f392a4461..4266a4de3160 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -651,6 +651,9 @@ int pcibios_add_device(struct pci_dev *pdev)
struct resource *res;
int i;
 
+   if (pdev->is_physfn)
+   pdev->no_vf_scan = 1;
+
pdev->dev.groups = zpci_attr_groups;
pdev->dev.dma_ops = _pci_dma_ops;
zpci_map_resources(pdev);
-- 
2.13.4



[PATCH 2/3] PCI/IOV: provide flag to skip VF scanning

2018-12-21 Thread Sebastian Ott
Provide a flag to skip scanning for new VFs after SRIOV enablement.
This can be set by implementations for which the VFs are already
reported by other means.

Signed-off-by: Sebastian Ott 
Reviewed-by: Christoph Hellwig 
---
 drivers/pci/iov.c   | 6 ++
 include/linux/pci.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 408db232a328..3aa115ed3a65 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -257,6 +257,9 @@ static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
unsigned int i;
int rc;
 
+   if (dev->no_vf_scan)
+   return 0;
+
for (i = 0; i < num_vfs; i++) {
rc = pci_iov_add_virtfn(dev, i);
if (rc)
@@ -385,6 +388,9 @@ static void sriov_del_vfs(struct pci_dev *dev)
struct pci_sriov *iov = dev->sriov;
int i;
 
+   if (dev->no_vf_scan)
+   return;
+
for (i = 0; i < iov->num_VFs; i++)
pci_iov_remove_virtfn(dev, i);
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 11c71c4ecf75..f9bc7651c406 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -405,6 +405,7 @@ struct pci_dev {
unsigned intnon_compliant_bars:1;   /* Broken BARs; ignore them */
unsigned intis_probed:1;/* Device probing in progress */
unsigned intlink_active_reporting:1;/* Device capable of reporting 
link active */
+   unsigned intno_vf_scan:1;   /* Don't scan for VFs after IOV 
enablement */
pci_dev_flags_t dev_flags;
atomic_tenable_cnt; /* pci_enable_device has been called */
 
-- 
2.13.4



[PATCH 1/3] PCI/IOV: factor out sriov_add_vfs

2018-12-21 Thread Sebastian Ott
Provide sriov_add_vfs as a wrapper to scan for VFs that cleans up
after itself. This is just a code simplification. No functional change.

Signed-off-by: Sebastian Ott 
Reviewed-by: Christoph Hellwig 
---
 drivers/pci/iov.c | 44 +++-
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 9616eca3182f..408db232a328 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -252,6 +252,24 @@ int __weak pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
 }
 
+static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
+{
+   unsigned int i;
+   int rc;
+
+   for (i = 0; i < num_vfs; i++) {
+   rc = pci_iov_add_virtfn(dev, i);
+   if (rc)
+   goto failed;
+   }
+   return 0;
+failed:
+   while (i--)
+   pci_iov_remove_virtfn(dev, i);
+
+   return rc;
+}
+
 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 {
int rc;
@@ -337,21 +355,15 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
msleep(100);
pci_cfg_access_unlock(dev);
 
-   for (i = 0; i < initial; i++) {
-   rc = pci_iov_add_virtfn(dev, i);
-   if (rc)
-   goto failed;
-   }
+   rc = sriov_add_vfs(dev, initial);
+   if (rc)
+   goto err_pcibios;
 
kobject_uevent(>dev.kobj, KOBJ_CHANGE);
iov->num_VFs = nr_virtfn;
 
return 0;
 
-failed:
-   while (i--)
-   pci_iov_remove_virtfn(dev, i);
-
 err_pcibios:
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
@@ -368,17 +380,23 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
return rc;
 }
 
-static void sriov_disable(struct pci_dev *dev)
+static void sriov_del_vfs(struct pci_dev *dev)
 {
+   struct pci_sriov *iov = dev->sriov;
int i;
+
+   for (i = 0; i < iov->num_VFs; i++)
+   pci_iov_remove_virtfn(dev, i);
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
struct pci_sriov *iov = dev->sriov;
 
if (!iov->num_VFs)
return;
 
-   for (i = 0; i < iov->num_VFs; i++)
-   pci_iov_remove_virtfn(dev, i);
-
+   sriov_del_vfs(dev);
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
-- 
2.13.4



[PATCH 2/2] s390/pci: skip VF scanning

2018-12-18 Thread Sebastian Ott
Set the flag to skip scanning for VFs after SRIOV enablement.
VF creation will be triggered by the hotplug code.

Signed-off-by: Sebastian Ott 
---
 arch/s390/pci/pci.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9f6f392a4461..4266a4de3160 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -651,6 +651,9 @@ int pcibios_add_device(struct pci_dev *pdev)
struct resource *res;
int i;
 
+   if (pdev->is_physfn)
+   pdev->no_vf_scan = 1;
+
pdev->dev.groups = zpci_attr_groups;
pdev->dev.dma_ops = _pci_dma_ops;
zpci_map_resources(pdev);
-- 
2.13.4



[PATCH 1/2] PCI/IOV: provide flag to skip VF scanning

2018-12-18 Thread Sebastian Ott
Provide a flag to skip scanning for new VFs after SRIOV enablement.
This can be set by implementations for which the VFs are already
reported by other means.

Signed-off-by: Sebastian Ott 
---
 drivers/pci/iov.c   | 48 
 include/linux/pci.h |  1 +
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 9616eca3182f..3aa115ed3a65 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -252,6 +252,27 @@ int __weak pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
 }
 
+static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
+{
+   unsigned int i;
+   int rc;
+
+   if (dev->no_vf_scan)
+   return 0;
+
+   for (i = 0; i < num_vfs; i++) {
+   rc = pci_iov_add_virtfn(dev, i);
+   if (rc)
+   goto failed;
+   }
+   return 0;
+failed:
+   while (i--)
+   pci_iov_remove_virtfn(dev, i);
+
+   return rc;
+}
+
 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 {
int rc;
@@ -337,21 +358,15 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
msleep(100);
pci_cfg_access_unlock(dev);
 
-   for (i = 0; i < initial; i++) {
-   rc = pci_iov_add_virtfn(dev, i);
-   if (rc)
-   goto failed;
-   }
+   rc = sriov_add_vfs(dev, initial);
+   if (rc)
+   goto err_pcibios;
 
kobject_uevent(>dev.kobj, KOBJ_CHANGE);
iov->num_VFs = nr_virtfn;
 
return 0;
 
-failed:
-   while (i--)
-   pci_iov_remove_virtfn(dev, i);
-
 err_pcibios:
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
@@ -368,17 +383,26 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
return rc;
 }
 
-static void sriov_disable(struct pci_dev *dev)
+static void sriov_del_vfs(struct pci_dev *dev)
 {
-   int i;
struct pci_sriov *iov = dev->sriov;
+   int i;
 
-   if (!iov->num_VFs)
+   if (dev->no_vf_scan)
return;
 
for (i = 0; i < iov->num_VFs; i++)
pci_iov_remove_virtfn(dev, i);
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+   struct pci_sriov *iov = dev->sriov;
+
+   if (!iov->num_VFs)
+   return;
 
+   sriov_del_vfs(dev);
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 11c71c4ecf75..f70b9ccd3e86 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -405,6 +405,7 @@ struct pci_dev {
unsigned intnon_compliant_bars:1;   /* Broken BARs; ignore them */
unsigned intis_probed:1;/* Device probing in progress */
unsigned intlink_active_reporting:1;/* Device capable of reporting 
link active */
+   unsigned intno_vf_scan:1;   /* Don't scan for VF's after VF 
enablement */
pci_dev_flags_t dev_flags;
atomic_tenable_cnt; /* pci_enable_device has been called */
 
-- 
2.13.4



Re: [PATCH 2/2] s390/pci: handle function enumeration after sriov enablement

2018-12-17 Thread Sebastian Ott
On Fri, 14 Dec 2018, Christoph Hellwig wrote:
> On Fri, Dec 14, 2018 at 05:12:45AM -0800, Christoph Hellwig wrote:
> > On Thu, Dec 13, 2018 at 06:54:28PM +0100, Sebastian Ott wrote:
> > > Implement pcibios_sriov_{add|del}_vfs as empty functions. VF
> > > creation will be triggered by the hotplug code.
> > 
> > And instead of having the arch suplply a no-op arch override I
> > think it would be better to have the config option just stub it
> > out in common code.
> 
> Or in fact maybe even a runtime flag in struct pci_dev.  Who knows
> if all future s390 PCIe busses will have exactly the same behavior
> or if we eventually get the standards compliant behvior back?

Something like this:
https://lore.kernel.org/linux-pci/20181212215453.gj99...@google.com/T/#m649d86ea3c65f669c74d048f89afbaf473876ac3

Not a runtime flag, but a function pointer in struct pci_host_bridge.
This would provide the requested flexibility. The problem with this
approach is that it requires other patches that are not yet upstream
(https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git/log/?h=pci-probe-rework).

Since this discussion is going on since a few months and I want to
have this code upstream and in distributions for HW enablement I've
asked Bjorn to go with the initial approach (weak functions) and
promised to move that to struct pci_host_bridge once Arnd's patches
are upstream. Would that be OK for you too?

Regards,
Sebastian



[PATCH 2/2] s390/pci: handle function enumeration after sriov enablement

2018-12-13 Thread Sebastian Ott
Implement pcibios_sriov_{add|del}_vfs as empty functions. VF
creation will be triggered by the hotplug code.

Signed-off-by: Sebastian Ott 
---
 arch/s390/pci/pci.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9f6f392a4461..b5f8db652bf5 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -731,6 +731,17 @@ struct dev_pm_ops pcibios_pm_ops = {
 };
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 
+#ifdef CONFIG_PCI_IOV
+int pcibios_sriov_add_vfs(struct pci_dev *pdev, u16 num_vfs)
+{
+   return 0;
+}
+
+void pcibios_sriov_del_vfs(struct pci_dev *pdev)
+{
+}
+#endif
+
 static int zpci_alloc_domain(struct zpci_dev *zdev)
 {
if (zpci_unique_uid) {
-- 
2.16.4



[PATCH 1/2] PCI: provide pcibios_sriov_add_vfs

2018-12-13 Thread Sebastian Ott
Move VF detection and device creation code to weak functions
such that architectures can provide a different implementation.

Signed-off-by: Sebastian Ott 
---
 drivers/pci/iov.c   | 43 +++
 include/linux/pci.h |  2 ++
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 9616eca3182f..1bfdb4deafd7 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -252,6 +252,33 @@ int __weak pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
 }
 
+int __weak pcibios_sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
+{
+   unsigned int i;
+   int rc;
+
+   for (i = 0; i < num_vfs; i++) {
+   rc = pci_iov_add_virtfn(dev, i);
+   if (rc)
+   goto failed;
+   }
+   return 0;
+failed:
+   while (i--)
+   pci_iov_remove_virtfn(dev, i);
+
+   return rc;
+}
+
+void __weak pcibios_sriov_del_vfs(struct pci_dev *dev)
+{
+   struct pci_sriov *iov = dev->sriov;
+   int i;
+
+   for (i = 0; i < iov->num_VFs; i++)
+   pci_iov_remove_virtfn(dev, i);
+}
+
 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 {
int rc;
@@ -337,21 +364,15 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
msleep(100);
pci_cfg_access_unlock(dev);
 
-   for (i = 0; i < initial; i++) {
-   rc = pci_iov_add_virtfn(dev, i);
-   if (rc)
-   goto failed;
-   }
+   rc = pcibios_sriov_add_vfs(dev, initial);
+   if (rc)
+   goto err_pcibios;
 
kobject_uevent(>dev.kobj, KOBJ_CHANGE);
iov->num_VFs = nr_virtfn;
 
return 0;
 
-failed:
-   while (i--)
-   pci_iov_remove_virtfn(dev, i);
-
 err_pcibios:
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
@@ -370,14 +391,12 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
 
 static void sriov_disable(struct pci_dev *dev)
 {
-   int i;
struct pci_sriov *iov = dev->sriov;
 
if (!iov->num_VFs)
return;
 
-   for (i = 0; i < iov->num_VFs; i++)
-   pci_iov_remove_virtfn(dev, i);
+   pcibios_sriov_del_vfs(dev);
 
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 11c71c4ecf75..84ca3bcdac76 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2001,6 +2001,8 @@ void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool 
probe);
 /* Arch may override these (weak) */
 int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
 int pcibios_sriov_disable(struct pci_dev *pdev);
+int pcibios_sriov_add_vfs(struct pci_dev *dev, u16 num_vfs);
+void pcibios_sriov_del_vfs(struct pci_dev *dev);
 resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 #else
 static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
-- 
2.16.4



Re: [PATCH 0/2] sriov enablement on s390

2018-12-05 Thread Sebastian Ott
Hello Bjorn,

On Wed, 10 Oct 2018, Bjorn Helgaas wrote:
> On Wed, Oct 10, 2018 at 02:55:07PM +0200, Sebastian Ott wrote:
> > On Wed, 12 Sep 2018, Bjorn Helgaas wrote:
> > > On Wed, Sep 12, 2018 at 02:34:09PM +0200, Sebastian Ott wrote:
> > > > On s390 we currently handle SRIOV within firmware. Which means
> > > > that the PF is under firmware control and not visible to operating
> > > > systems. SRIOV enablement happens within firmware and VFs are
> > > > passed through to logical partitions.
> > > > 
> > > > I'm working on a new mode were the PF is under operating system
> > > > control (including SRIOV enablement). However we still need
> > > > firmware support to access the VFs. The way this is supposed
> > > > to work is that when firmware traps the SRIOV enablement it
> > > > will present machine checks to the logical partition that
> > > > triggered the SRIOV enablement and provide the VFs via hotplug
> > > > events.
> > > > 
> > > > The problem I'm faced with is that the VF detection code in
> > > > sriov_enable leads to unusable functions in s390.
> > > 
> > > We're moving away from the weak function implementation style.  Can
> > > you take a look at Arnd's work here, which uses pci_host_bridge
> > > callbacks instead?
> > > 
> > >   https://lkml.kernel.org/r/20180817102645.3839621-1-a...@arndb.de
> > 
> > What's the status of Arnd's patches - will they go upstream in the next
> > couple of versions?
> 
> I hope so [1].  IIRC Arnd mentioned doing some minor updates, so I'm
> waiting on that.
> 
> > What about my patches that I rebased on Arnd's branch
> > will they be considered?
> 
> Definitely.  From my point of view they're just lined up behind Arnd's
> patches.
> 
> [1] 
> https://lore.kernel.org/linux-pci/20181002205903.gd120...@bhelgaas-glaptop.roam.corp.google.com

It appears like these patches are not in-line for the next merge window.
Would it be possible to go with my original patches (using __weak
functions)? (This would also make life easier with regards to backports)
I can post patches to convert this to use function pointers once Arnd's
patches make it to the kernel.

Regards,
Sebastian



Re: [PATCH 0/2] sriov enablement on s390

2018-10-10 Thread Sebastian Ott
Hello Bjorn,

On Wed, 12 Sep 2018, Bjorn Helgaas wrote:
> On Wed, Sep 12, 2018 at 02:34:09PM +0200, Sebastian Ott wrote:
> > On s390 we currently handle SRIOV within firmware. Which means
> > that the PF is under firmware control and not visible to operating
> > systems. SRIOV enablement happens within firmware and VFs are
> > passed through to logical partitions.
> > 
> > I'm working on a new mode were the PF is under operating system
> > control (including SRIOV enablement). However we still need
> > firmware support to access the VFs. The way this is supposed
> > to work is that when firmware traps the SRIOV enablement it
> > will present machine checks to the logical partition that
> > triggered the SRIOV enablement and provide the VFs via hotplug
> > events.
> > 
> > The problem I'm faced with is that the VF detection code in
> > sriov_enable leads to unusable functions in s390.
> 
> We're moving away from the weak function implementation style.  Can
> you take a look at Arnd's work here, which uses pci_host_bridge
> callbacks instead?
> 
>   https://lkml.kernel.org/r/20180817102645.3839621-1-a...@arndb.de

What's the status of Arnd's patches - will they go upstream in the next
couple of versions? What about my patches that I rebased on Arnd's branch
will they be considered?

Regards,
Sebastian



[PATCH 2/2] s390/pci: handle function enumeration after sriov enablement

2018-09-13 Thread Sebastian Ott
Implement add_vfs|del_vfs callbacks as empty functions. VF
creation will be triggered by the hotplug code.

Signed-off-by: Sebastian Ott 
---
 arch/s390/pci/pci.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9381d5d98156..2ba2cbfaa091 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -785,6 +785,15 @@ static void zpci_remove_bus(struct pci_bus *bus)
kfree(zdev);
 }
 
+static int zpci_add_vfs(struct pci_dev *pdev, u16 num_vfs)
+{
+   return 0;
+}
+
+static void zpci_del_vfs(struct pci_dev *pdev)
+{
+}
+
 static struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
@@ -801,6 +810,8 @@ static struct pci_bus *pci_scan_root_bus(struct device 
*parent, int bus,
bridge->busnr = bus;
bridge->ops = ops;
bridge->remove_bus = zpci_remove_bus;
+   bridge->add_vfs = zpci_add_vfs;
+   bridge->del_vfs = zpci_del_vfs;
 
error = pci_scan_root_bus_bridge(bridge);
if (error < 0)
-- 
2.13.4



[PATCH 1/2] pci: provide add_vfs/del_vfs callbacks

2018-09-13 Thread Sebastian Ott
Provide callbacks that can be used by PCI host bridge implementations
to override the behavior of the generic vf detection and device
creation code.

Signed-off-by: Sebastian Ott 
---
 drivers/pci/iov.c   | 51 +++
 include/linux/pci.h |  2 ++
 2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 0f04ae648cf1..b2ddfe30c5d8 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -251,6 +251,41 @@ int __weak pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
 }
 
+static int pcibios_sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
+{
+   struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+   unsigned int i;
+   int rc;
+
+   if (bridge->add_vfs)
+   return bridge->add_vfs(dev, num_vfs);
+
+   for (i = 0; i < num_vfs; i++) {
+   rc = pci_iov_add_virtfn(dev, i);
+   if (rc)
+   goto failed;
+   }
+   return 0;
+failed:
+   while (i--)
+   pci_iov_remove_virtfn(dev, i);
+
+   return rc;
+}
+
+static void pcibios_sriov_del_vfs(struct pci_dev *dev)
+{
+   struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+   struct pci_sriov *iov = dev->sriov;
+   int i;
+
+   if (bridge->del_vfs)
+   return bridge->del_vfs(dev);
+
+   for (i = 0; i < iov->num_VFs; i++)
+   pci_iov_remove_virtfn(dev, i);
+}
+
 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 {
int rc;
@@ -336,21 +371,15 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
msleep(100);
pci_cfg_access_unlock(dev);
 
-   for (i = 0; i < initial; i++) {
-   rc = pci_iov_add_virtfn(dev, i);
-   if (rc)
-   goto failed;
-   }
+   rc = pcibios_sriov_add_vfs(dev, initial);
+   if (rc)
+   goto err_pcibios;
 
kobject_uevent(>dev.kobj, KOBJ_CHANGE);
iov->num_VFs = nr_virtfn;
 
return 0;
 
-failed:
-   while (i--)
-   pci_iov_remove_virtfn(dev, i);
-
 err_pcibios:
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
@@ -369,14 +398,12 @@ static int sriov_enable(struct pci_dev *dev, int 
nr_virtfn)
 
 static void sriov_disable(struct pci_dev *dev)
 {
-   int i;
struct pci_sriov *iov = dev->sriov;
 
if (!iov->num_VFs)
return;
 
-   for (i = 0; i < iov->num_VFs; i++)
-   pci_iov_remove_virtfn(dev, i);
+   pcibios_sriov_del_vfs(dev);
 
iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 680b6bcd0b97..bf99ae98ecb5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -475,6 +475,8 @@ struct pci_host_bridge {
int (*free_irq)(struct pci_dev *);
void (*add_bus)(struct pci_bus *);
void (*remove_bus)(struct pci_bus *);
+   int (*add_vfs)(struct pci_dev *dev, u16 num_vfs);
+   void (*del_vfs)(struct pci_dev *dev);
void*release_data;
struct msi_controller *msi;
unsigned intignore_reset_delay:1;   /* For entire hierarchy */
-- 
2.13.4



Re: [PATCH 0/2] sriov enablement on s390

2018-09-13 Thread Sebastian Ott
On Wed, 12 Sep 2018, Bjorn Helgaas wrote:
> [+cc Arnd, powerpc folks]
> 
> On Wed, Sep 12, 2018 at 02:34:09PM +0200, Sebastian Ott wrote:
> > Hello Bjorn,
> > 
> > On s390 we currently handle SRIOV within firmware. Which means
> > that the PF is under firmware control and not visible to operating
> > systems. SRIOV enablement happens within firmware and VFs are
> > passed through to logical partitions.
> > 
> > I'm working on a new mode were the PF is under operating system
> > control (including SRIOV enablement). However we still need
> > firmware support to access the VFs. The way this is supposed
> > to work is that when firmware traps the SRIOV enablement it
> > will present machine checks to the logical partition that
> > triggered the SRIOV enablement and provide the VFs via hotplug
> > events.
> > 
> > The problem I'm faced with is that the VF detection code in
> > sriov_enable leads to unusable functions in s390.
> 
> We're moving away from the weak function implementation style.  Can
> you take a look at Arnd's work here, which uses pci_host_bridge
> callbacks instead?
> 
>   https://lkml.kernel.org/r/20180817102645.3839621-1-a...@arndb.de
> 
> I cc'd some powerpc folks because they also have a fair amount of
> arch-specific SR-IOV code that might one day move in this direction.

Rebased to Arnd's pci-probe-rework branch.

Sebastian Ott (2):
  pci: provide add_vfs/del_vfs callbacks
  s390/pci: handle function enumeration after sriov enablement

 arch/s390/pci/pci.c | 11 +++
 drivers/pci/iov.c   | 51 +++
 include/linux/pci.h |  2 ++
 3 files changed, 52 insertions(+), 12 deletions(-)

-- 
2.13.4



Re: [PATCH 8/9] PCI: hotplug: Embed hotplug_slot

2018-09-03 Thread Sebastian Ott
On Sun, 19 Aug 2018, Lukas Wunner wrote:
> When the PCI hotplug core and its first user, cpqphp, were introduced in
> February 2002 with historic commit a8a2069f432c, cpqphp allocated a slot
> struct for its internal use plus a hotplug_slot struct to be registered
> with the hotplug core and linked the two with pointers:
> https://git.kernel.org/tglx/history/c/a8a2069f432c
> 
> Nowadays, the predominant pattern in the tree is to embed ("subclass")
> such structures in one another and cast to the containing struct with
> container_of().  But it wasn't until July 2002 that container_of() was
> introduced with historic commit ec4f214232cf:
> https://git.kernel.org/tglx/history/c/ec4f214232cf
> 
> pnv_php, introduced in 2016, did the right thing and embedded struct
> hotplug_slot in its internal struct pnv_php_slot, but all other drivers
> cargo-culted cpqphp's design and linked separate structs with pointers.
> 
> Embedding structs is preferrable to linking them with pointers because
> it requires fewer allocations, thereby reducing overhead and simplifying
> error paths.  Casting an embedded struct to the containing struct
> becomes a cheap subtraction rather than a dereference.  And having fewer
> pointers reduces the risk of them pointing nowhere either accidentally
> or due to an attack.
> 
> Convert all drivers to embed struct hotplug_slot in their internal slot
> struct.  The "private" pointer in struct hotplug_slot thereby becomes
> unused, so drop it.
> 
> Signed-off-by: Lukas Wunner 
> Cc: Rafael J. Wysocki 
> Cc: Len Brown 
> Cc: Scott Murray 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Gavin Shan 
> Cc: Sebastian Ott 
> Cc: Gerald Schaefer 
> Cc: Corentin Chary 
> Cc: Darren Hart 
> Cc: Andy Shevchenko 

for s390_pci_hpc.c:
Acked-by: Sebastian Ott 



Re: [PATCH 7/9] PCI: hotplug: Drop hotplug_slot_info

2018-09-03 Thread Sebastian Ott
On Sun, 19 Aug 2018, Lukas Wunner wrote:
> Ever since the PCI hotplug core was introduced in 2002, drivers had to
> allocate and register a struct hotplug_slot_info for every slot:
> https://git.kernel.org/tglx/history/c/a8a2069f432c
> 
> Apparently the idea was that drivers furnish the hotplug core with an
> up-to-date card presence status, power status, latch status and
> attention indicator status as well as notify the hotplug core of changes
> thereof.  However only 4 out of 12 hotplug drivers bother to notify the
> hotplug core with pci_hp_change_slot_info() and the hotplug core never
> made any use of the information:  There is just a single macro in
> pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if
> the driver lacks the corresponding callback in hotplug_slot_ops.  The
> macro is called when the user reads the attribute via sysfs.
> 
> Now, if the callback isn't defined, the attribute isn't exposed in sysfs
> in the first place (see e.g. has_power_file()).  There are only two
> situations when the hotplug_slot_info would actually be accessed:
> 
> * If the driver defines ->enable_slot or ->disable_slot but not
>   ->get_power_status.
> 
> * If the driver defines ->set_attention_status but not
>   ->get_attention_status.
> 
> There is no driver doing the former and just a single driver doing the
> latter, namely pnv_php.c.  Amend it with a ->get_attention_status
> callback.  With that, the hotplug_slot_info becomes completely unused by
> the PCI hotplug core.  But a few drivers use it internally as a cache:
> 
> cpcihp uses it to cache the latch_status and adapter_status.
> cpqhp uses it to cache the adapter_status.
> pnv_php and rpaphp use it to cache the attention_status.
> shpchp uses it to cache all four values.
> 
> Amend these drivers to cache the information in their private slot
> struct.  shpchp's slot struct already contains members to cache the
> power_status and adapter_status, so additional members are only needed
> for the other two values.  In the case of cpqphp, the cached value is
> only accessed in a single place, so instead of caching it, read the
> current value from the hardware.
> 
> Caution:  acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop
> populate the hotplug_slot_info with initial values on probe.  That code
> is herewith removed.  There is a theoretical chance that the code has
> side effects without which the driver fails to function, e.g. if the
> ACPI method to read the adapter status needs to be executed at least
> once on probe.  That seems unlikely to me, still maintainers should
> review the changes carefully for this possibility.
> 
> Signed-off-by: Lukas Wunner 
> Cc: Rafael J. Wysocki 
> Cc: Len Brown 
> Cc: Scott Murray 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Gavin Shan 
> Cc: Sebastian Ott 
> Cc: Gerald Schaefer 
> Cc: Corentin Chary 
> Cc: Darren Hart 
> Cc: Andy Shevchenko 

for s390_pci_hpc.c:
Acked-by: Sebastian Ott 



Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-24 Thread Sebastian Ott
On Wed, 24 Feb 2016, Martin Schwidefsky wrote:
> On Tue, 23 Feb 2016 22:33:45 +0300
> "Kirill A. Shutemov"  wrote:
> 
> > On Tue, Feb 23, 2016 at 07:19:07PM +0100, Gerald Schaefer wrote:
> > > I'll check with Martin, maybe it is actually trivial, then we can
> > > do a quick test it to rule that one out.
> > 
> > Oh. I found a bug in __split_huge_pmd_locked(). Although, not sure if it's
> > _the_ bug.
> > 
> > pmdp_invalidate() is called for the wrong address :-/
> > I guess that can be destructive on the architecture, right?
> > 
> > Could you check this?
> > 
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 1c317b85ea7d..4246bc70e55a 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -2865,7 +2865,7 @@ static void __split_huge_pmd_locked(struct 
> > vm_area_struct *vma, pmd_t *pmd,
> > pgtable = pgtable_trans_huge_withdraw(mm, pmd);
> > pmd_populate(mm, &_pmd, pgtable);
> > 
> > -   for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
> > +   for (i = 0; i < HPAGE_PMD_NR; i++) {
> > pte_t entry, *pte;
> > /*
> >  * Note that NUMA hinting access restrictions are not
> > @@ -2886,9 +2886,9 @@ static void __split_huge_pmd_locked(struct 
> > vm_area_struct *vma, pmd_t *pmd,
> > }
> > if (dirty)
> > SetPageDirty(page + i);
> > -   pte = pte_offset_map(&_pmd, haddr);
> > +   pte = pte_offset_map(&_pmd, haddr + i * PAGE_SIZE);
> > BUG_ON(!pte_none(*pte));
> > -   set_pte_at(mm, haddr, pte, entry);
> > +   set_pte_at(mm, haddr + i * PAGE_SIZE, pte, entry);
> > atomic_inc([i]._mapcount);
> > pte_unmap(pte);
> > }
> > @@ -2938,7 +2938,7 @@ static void __split_huge_pmd_locked(struct 
> > vm_area_struct *vma, pmd_t *pmd,
> > pmd_populate(mm, pmd, pgtable);
> > 
> > if (freeze) {
> > -   for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
> > +   for (i = 0; i < HPAGE_PMD_NR; i++) {
> > page_remove_rmap(page + i, false);
> > put_page(page + i);
> > }
> 
> Test is running and it looks good so far. For the final assessment I defer
> to Gerald and Sebastian.
> 

Yes, that one worked. My testsystem is doing make -j10 && make clean
in a loop since 4 hours now. Thanks!

Sebastian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-19 Thread Sebastian Ott

On Thu, 18 Feb 2016, Kirill A. Shutemov wrote:
> I worth minimizing kernel config on which you can see the bug. Things like
> CONFIG_DEBUG_PAGEALLOC used to interfere with THP before.

I disabled all debugging options (using
arch/s390/configs/performance_defconfig) - we still chrashed.

Sebastian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-17 Thread Sebastian Ott
Hi,

On Wed, 17 Feb 2016, Kirill A. Shutemov wrote:
> On Tue, Feb 16, 2016 at 05:24:44PM +0100, Gerald Schaefer wrote:
> > On Mon, 15 Feb 2016 23:35:26 +0200
> > "Kirill A. Shutemov"  wrote:
> > 
> > > Is there any chance that I'll be able to trigger the bug using QEMU?
> > > Does anybody have an QEMU image I can use?
> > > 
> > 
> > I have no image, but trying to reproduce this under virtualization may
> > help to trigger this also on other architectures. After ruling out IPI
> > vs. fast_gup I do not really see why this should be arch-specific, and
> > it wouldn't be the first time that we hit subtle races first on s390, due
> > to our virtualized environment (my test case is make -j20 with 10 CPUs and
> > 4GB of memory, no swap).
> 
> Could you post your kernel config?

Attached.

> It would be nice also to check if disabling split_huge_page() would make
> any difference:
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index a75081ca31cf..26d2b7b21021 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3364,6 +3364,8 @@ int split_huge_page_to_list(struct page *page, struct 
> list_head *list)
>   bool mlocked;
>   unsigned long flags;
> 
> + return -EBUSY;
> +
>   VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
>   VM_BUG_ON_PAGE(!PageAnon(page), page);
>   VM_BUG_ON_PAGE(!PageLocked(page), page);
> -- 

65c23c6 + this patch also oopsed:

¢ 1707.903808! ODEBUG: active_state not available (active state 0) object type:
rcu_head hint:   (null)
¢ 1707.903852! ¢ cut here !
¢ 1707.903854! WARNING: at lib/debugobjects.c:263
¢ 1707.903856! Modules linked in: bridge stp llc btrfs mlx4_ib mlx4_en ib_sa vxl
an ib_mad ip6_udp_tunnel ib_core udp_tunnel ptp pps_core ib_addr xor raid6_pq gh
ash_s390 mlx4_core prng ecb aes_s390 des_s390 des_generic sha512_s390 dm_mod sha
256_s390 genwqe_card sha1_s390 sha_common crc_itu_t scm_block eadm_sch vhost_net
tun vhost macvtap macvlan kvm autofs4
¢ 1707.903892! CPU: 4 PID: 25215 Comm: git Not tainted 4.5.0-rc4-00037-g65c23c6-
dirty #273
¢ 1707.903894! task: 06a6 ti: 63b04000 task.ti: 63b0
4000
¢ 1707.903896! Krnl PSW : 0404c0018000 00486ce0 (debug_print_object+
 0xb0/0xd0)
¢ 1707.903905!R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:
3
Krnl GPRS: 01a361c7 06a6 0060 0101
¢ 1707.903908!00486cdc  0088cbdc 000
001b53848
¢ 1707.903910!0701  01b53850 000
0008bb820
¢ 1707.903912!00a8d710 dcdd3d38 00486cdc 000
0dcdd3c38
¢ 1707.903920! Krnl Code: 00486cd0: c0200021a496larl%%r2,8bb
5fc
00486cd6: c0e5ffee03a1   brasl   %%r14,247418
#00486cdc: a7f40001   brc 15,486cde
>00486ce0: c41d002f488e   lrl %%r1,a6fdfc
00486ce6: e340f0e80004   lg  %%r4,232(%%r15)
00486cec: a71a0001   ahi %%r1,1
00486cf0: eb6ff0a80004   lmg %%r6,%%r15,168(%%r15)
00486cf6: c41f002f4883   strl%%r1,a6fdfc
¢ 1707.903960! Call Trace:
¢ 1707.903962! (¢<00486cdc>! debug_print_object+0xac/0xd0)
¢ 1707.903964!  ¢<00488094>! debug_object_active_state+0x164/0x178
¢ 1707.903969!  ¢<001b991c>! rcu_process_callbacks+0x564/0x9e8
¢ 1707.903973!  ¢<0013d3ee>! __do_softirq+0x256/0x568
¢ 1707.903975!  ¢<0013da3a>! irq_exit+0x7a/0xd8
¢ 1707.903979!  ¢<0010c87e>! do_IRQ+0x86/0xc0
¢ 1707.903984!  ¢<006fa3f2>! ext_int_handler+0x11e/0x124
¢ 1707.903987!  ¢<00199bfe>! lock_release+0x5ce/0x670
¢ 1707.903989! (¢<00199be0>! lock_release+0x5b0/0x670)
¢ 1707.903993!  ¢<002dffa2>! getname_flags+0x82/0x218
¢ 1707.903994!  ¢<002e04e8>! user_path_at_empty+0x40/0x68
¢ 1707.903998!  ¢<002d44a4>! vfs_fstatat+0x6c/0xc8
¢ 1707.903999!  ¢<002d4894>! SyS_newlstat+0x2c/0x48
¢ 1707.904002!  ¢<006f9cce>! system_call+0xd6/0x258
¢ 1707.904003!  ¢<03ffb45f1124>! 0x3ffb45f1124
¢ 1707.904005! 1 lock held by git/25215:
¢ 1707.904006!  #0:  (_hash¢i!.lock){-.-.-.}, at: ¢<00487fdc>! debug
_object_active_state+0xac/0x178
¢ 1707.904012! Last Breaking-Event-Address:
¢ 1707.904014!  ¢<00486cdc>! debug_print_object+0xac/0xd0
¢ 1707.904016! ---¢ end trace 8ce68dc422e8321c !---
¢ 1707.904018! ODEBUG: deactivate not available (active state 0) object type: rc
u_head hint:   (null)
¢ 1707.904026! ¢ cut here !
¢ 1707.904027! WARNING: at lib/debugobjects.c:263
¢ 1707.904028! Modules linked in: bridge stp llc btrfs mlx4_ib mlx4_en ib_sa vxl
an ib_mad ip6_udp_tunnel ib_core udp_tunnel ptp pps_core ib_addr xor raid6_pq gh
ash_s390 mlx4_core prng ecb aes_s390 des_s390 des_generic sha512_s390 

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-16 Thread Sebastian Ott

On Mon, 15 Feb 2016, Kirill A. Shutemov wrote:
> Just to make sure: commit 122afea9626a is fine, commit 61f5d698cc97
> crashes. Correct?

Correct.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-15 Thread Sebastian Ott
On Mon, 15 Feb 2016, Kirill A. Shutemov wrote:
> > [   59.851421] list_del corruption. next->prev should be 6e1eb000, 
> > but was 0400
> 
> This kinda interesting: 0x400 is TAIL_MAPPING.. Hm..
> 
> Could you check if you see the problem on commit 1c290f642101 and its
> immediate parent?

Both 1c290f642101 and 1c290f642101^ survived 20 compile runs each.

Sebastian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-13 Thread Sebastian Ott

On Sat, 13 Feb 2016, Kirill A. Shutemov wrote:
> Could you check if revert of fecffad25458 helps?

I reverted fecffad25458 on top of 721675fcf277cf - it oopsed with:

¢ 1851.721062! Unable to handle kernel pointer dereference in virtual kernel 
address space
¢ 1851.721075! failing address:  TEID: 0483
¢ 1851.721078! Fault in home space mode while using kernel ASCE.
¢ 1851.721085! AS:00d5c007 R3:0007 S:a800 
P:003d
¢ 1851.721128! Oops: 0004 ilc:3 ¢#1! PREEMPT SMP DEBUG_PAGEALLOC
¢ 1851.721135! Modules linked in: bridge stp llc btrfs mlx4_ib mlx4_en ib_sa 
ib_mad vxlan xor ip6_udp_tunnel ib_core udp_tunnel ptp pps_core ib_addr 
ghash_s390raid6_pq prng ecb aes_s390 mlx4_core des_s390 des_generic genwqe_card 
sha512_s390 sha256_s390 sha1_s390 sha_common crc_itu_t dm_mod scm_block 
vhost_net tun vhost eadm_sch macvtap macvlan kvm autofs4
¢ 1851.721183! CPU: 7 PID: 256422 Comm: bash Not tainted 
4.5.0-rc3-00058-g07923d7-dirty #178
¢ 1851.721186! task: 7fbfd290 ti: 8c604000 task.ti: 
8c604000
¢ 1851.721189! Krnl PSW : 0704d0018000 0045d3b8 
(__rb_erase_color+0x280/0x308)
¢ 1851.721200!R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 
EA:3
   Krnl GPRS: 0001 0020  
bd07eff1
¢ 1851.721205!0027ca10  83e45898 
77b61198
¢ 1851.721207!7ce1a490 bd07eff0 7ce1a548 
0027ca10
¢ 1851.721210!bd07c350 bd07eff0 8c607aa8 
8c607a68
¢ 1851.721221! Krnl Code: 0045d3aa: e3c0d0080024   stg 
%%r12,8(%%r13)
  0045d3b0: b9040039   lgr %%r3,%%r9
 #0045d3b4: a53b0001   oill%%r3,1
 >0045d3b8: e3301024   stg 
%%r3,0(%%r1)
  0045d3be: ec28000e007c   cgij
%%r2,0,8,45d3da
  0045d3c4: e3402004   lg  
%%r4,0(%%r2)
  0045d3ca: b904001c   lgr 
%%r1,%%r12
  0045d3ce: ec143f3f0056   rosbg   
%%r1,%%r4,63,63,0
¢ 1851.721269! Call Trace:
¢ 1851.721273! (¢<83e45898>! 0x83e45898)
¢ 1851.721279!  ¢<0029342a>! unlink_anon_vmas+0x9a/0x1d8
¢ 1851.721282!  ¢<00283f34>! free_pgtables+0xcc/0x148
¢ 1851.721285!  ¢<0028c376>! exit_mmap+0xd6/0x300
¢ 1851.721289!  ¢<00134db8>! mmput+0x90/0x118
¢ 1851.721294!  ¢<002d76bc>! flush_old_exec+0x5d4/0x700
¢ 1851.721298!  ¢<003369f4>! load_elf_binary+0x2f4/0x13e8
¢ 1851.721301!  ¢<002d6e4a>! search_binary_handler+0x9a/0x1f8
¢ 1851.721304!  ¢<002d8970>! do_execveat_common.isra.32+0x668/0x9a0
¢ 1851.721307!  ¢<002d8cec>! do_execve+0x44/0x58
¢ 1851.721310!  ¢<002d8f92>! SyS_execve+0x3a/0x48
¢ 1851.721315!  ¢<006fb096>! system_call+0xd6/0x258
¢ 1851.721317!  ¢<03ff997436d6>! 0x3ff997436d6
¢ 1851.721319! INFO: lockdep is turned off.
¢ 1851.721321! Last Breaking-Event-Address:
¢ 1851.721323!  ¢<0045d31a>! __rb_erase_color+0x1e2/0x308
¢ 1851.721327!
¢ 1851.721329! ---¢ end trace 0d80041ac00cfae2 !---


> 
> And could you share how crashes looks like? I haven't seen backtraces yet.
> 

Sure. I didn't because they really looked random to me. Most of the time
in rcu or list debugging but I thought these have just been the messenger
observing a corruption first. Anyhow, here is an older one that might look
interesting:

[   59.851421] list_del corruption. next->prev should be 6e1eb000, but 
was 0400
[   59.851469] [ cut here ]
[   59.851472] WARNING: at lib/list_debug.c:71
[   59.851475] Modules linked in: bridge stp llc btrfs xor mlx4_en vxlan 
ip6_udp_tunnel udp_tunnel mlx4_ib ptp pps_core ib_sa ib_mad ib_core ib_addr 
ghash_s390 prng raid6_pq ecb aes_s390 des_s390 des_generic sha512_s390 
sha256_s390 sha1_s390 mlx4_core sha_common genwqe_card scm_block crc_itu_t 
vhost_net tun vhost dm_mod macvtap eadm_sch macvlan kvm autofs4
[   59.851532] CPU: 0 PID: 5400 Comm: git Not tainted 
4.4.0-07794-ga4eff16-dirty #77
[   59.851535] task: d231 ti: d661 task.ti: 
d661
[   59.851539] Krnl PSW : 0704c0018000 00487434 
(__list_del_entry+0xa4/0xe0)
[   59.851548]R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 
EA:3
   Krnl GPRS: 01a7a1cf d231 0054 
0001
[   59.851554]00487430   
774e6900
[   59.851557]03ff5300 6d4017a0 03ff52f0 
03ff52f0
[   59.851560]03d10178 6e1eb000 00487430 
d6613b00
[   59.851571] Krnl 

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-12 Thread Sebastian Ott
On Fri, 12 Feb 2016, Will Deacon wrote:
> On Thu, Feb 11, 2016 at 08:57:02PM +0100, Gerald Schaefer wrote:
> > On Thu, 11 Feb 2016 21:09:42 +0200
> > "Kirill A. Shutemov" <kir...@shutemov.name> wrote:
> > > On Thu, Feb 11, 2016 at 07:22:23PM +0100, Gerald Schaefer wrote:
> > > > Sebastian Ott reported random kernel crashes beginning with v4.5-rc1 and
> > > > he also bisected this to commit 61f5d698 "mm: re-enable THP". Further
> > > > review of the THP rework patches, which cannot be bisected, revealed
> > > > commit fecffad "s390, thp: remove infrastructure for handling splitting 
> > > > PMDs"
> > > > (and also similar commits for other archs).
> > > > 
> > > > This commit removes the THP splitting bit and also the architecture
> > > > implementation of pmdp_splitting_flush(), which took care of the IPI for
> > > > fast_gup serialization. The commit message says
> > > > 
> > > > pmdp_splitting_flush() is not needed too: on splitting PMD we will 
> > > > do
> > > > pmdp_clear_flush() + set_pte_at().  pmdp_clear_flush() will do IPI 
> > > > as
> > > > needed for fast_gup
> > > > 
> > > > The assumption that a TLB flush will also produce an IPI is wrong on 
> > > > s390,
> > > > and maybe also on other architectures, and I thought that this was 
> > > > actually
> > > > the main reason for having an arch-specific pmdp_splitting_flush().
> > > > 
> > > > At least PowerPC and ARM also had an individual implementation of
> > > > pmdp_splitting_flush() that used kick_all_cpus_sync() instead of a TLB
> > > > flush to send the IPI, and those were also removed. Putting the arch
> > > > maintainers and mailing lists on cc to verify.
> > > > 
> > > > On s390 this will break the IPI serialization against fast_gup, which
> > > > would certainly explain the random kernel crashes, please revert or fix
> > > > the pmdp_splitting_flush() removal.
> > > 
> > > Sorry for that.
> > > 
> > > I believe, the problem was already addressed for PowerPC:
> > > 
> > > http://lkml.kernel.org/g/454980831-16631-1-git-send-email-aneesh.ku...@linux.vnet.ibm.com
> > > 
> > > I think kick_all_cpus_sync() in arch-specific pmdp_invalidate() would do
> > > the trick, right?
> > 
> > Hmm, not sure about that. After pmdp_invalidate(), a pmd_none() check in
> > fast_gup will still return false, because the pmd is not empty (at least
> > on s390). So I don't see spontaneously how it will help fast_gup to break
> > out to the slow path in case of THP splitting.
> > 
> > > 
> > > If yes, I'll prepare patch tomorrow (some sleep required).
> > > 
> > 
> > We'll check if adding kick_all_cpus_sync() to pmdp_invalidate() helps.
> > It would also be good if Martin has a look at this, he'll return on
> > Monday.
> 
> Do you have a reliable way to trigger the "random kernel crashes"? We've not
> seen anything reported on arm64, but I don't see why we wouldn't be affected
> by the same bug and it would be good to confirm and validate a fix.

My testcase was compiling the kernel. Most of the time my test system
didn't survive a single compile run. During bisect I did at least 20
compile runs to flag a commit as good.

Sebastian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-12 Thread Sebastian Ott
On Thu, 11 Feb 2016, Kirill A. Shutemov wrote:
> On Thu, Feb 11, 2016 at 09:09:42PM +0200, Kirill A. Shutemov wrote:
> > On Thu, Feb 11, 2016 at 07:22:23PM +0100, Gerald Schaefer wrote:
> > > Hi,
> > > 
> > > Sebastian Ott reported random kernel crashes beginning with v4.5-rc1 and
> > > he also bisected this to commit 61f5d698 "mm: re-enable THP". Further
> > > review of the THP rework patches, which cannot be bisected, revealed
> > > commit fecffad "s390, thp: remove infrastructure for handling splitting 
> > > PMDs"
> > > (and also similar commits for other archs).
> > > 
> > > This commit removes the THP splitting bit and also the architecture
> > > implementation of pmdp_splitting_flush(), which took care of the IPI for
> > > fast_gup serialization. The commit message says
> > > 
> > > pmdp_splitting_flush() is not needed too: on splitting PMD we will do
> > > pmdp_clear_flush() + set_pte_at().  pmdp_clear_flush() will do IPI as
> > > needed for fast_gup
> > > 
> > > The assumption that a TLB flush will also produce an IPI is wrong on s390,
> > > and maybe also on other architectures, and I thought that this was 
> > > actually
> > > the main reason for having an arch-specific pmdp_splitting_flush().
> > > 
> > > At least PowerPC and ARM also had an individual implementation of
> > > pmdp_splitting_flush() that used kick_all_cpus_sync() instead of a TLB
> > > flush to send the IPI, and those were also removed. Putting the arch
> > > maintainers and mailing lists on cc to verify.
> > > 
> > > On s390 this will break the IPI serialization against fast_gup, which
> > > would certainly explain the random kernel crashes, please revert or fix
> > > the pmdp_splitting_flush() removal.
> > 
> > Sorry for that.
> > 
> > I believe, the problem was already addressed for PowerPC:
> > 
> > http://lkml.kernel.org/g/454980831-16631-1-git-send-email-aneesh.ku...@linux.vnet.ibm.com
> 
> Correct link is
> 
> http://lkml.kernel.org/g/1454980831-16631-1-git-send-email-aneesh.ku...@linux.vnet.ibm.com
> 

Based on your suggestion Gerald provided the following patch but sadly it
didn't fix the problem.

Sebastian


---
 arch/s390/include/asm/pgtable.h |2 ++
 1 file changed, 2 insertions(+)

--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1587,6 +1587,8 @@ static inline void pmdp_invalidate(struc
   unsigned long address, pmd_t *pmdp)
 {
pmdp_flush_direct(vma->vm_mm, address, pmdp);
+   /* Serialize against fast_gup with IPI */
+   kick_all_cpus_sync();
 }

 #define __HAVE_ARCH_PMDP_SET_WRPROTECT

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 16/31] s390: handle page-less SG entries

2015-08-12 Thread Sebastian Ott
On Wed, 12 Aug 2015, Christoph Hellwig wrote:

 Use sg_phys() instead of page_to_phys(sg_page(sg)) so that we don't
 require a page structure for all DMA memory.
 
 Signed-off-by: Christoph Hellwig h...@lst.de

Acked-by: Sebastian Ott seb...@linux.vnet.ibm.com


 ---
  arch/s390/pci/pci_dma.c | 20 ++--
  1 file changed, 14 insertions(+), 6 deletions(-)
 
 diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
 index 6fd8d58..aae5a47 100644
 --- a/arch/s390/pci/pci_dma.c
 +++ b/arch/s390/pci/pci_dma.c
 @@ -272,14 +272,13 @@ int dma_set_mask(struct device *dev, u64 mask)
  }
  EXPORT_SYMBOL_GPL(dma_set_mask);
  
 -static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 -  unsigned long offset, size_t size,
 +static dma_addr_t s390_dma_map_phys(struct device *dev, unsigned long pa,
 +  size_t size,
enum dma_data_direction direction,
struct dma_attrs *attrs)
  {
   struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
   unsigned long nr_pages, iommu_page_index;
 - unsigned long pa = page_to_phys(page) + offset;
   int flags = ZPCI_PTE_VALID;
   dma_addr_t dma_addr;
  
 @@ -301,7 +300,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, 
 struct page *page,
  
   if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
   atomic64_add(nr_pages, zdev-mapped_pages);
 - return dma_addr + (offset  ~PAGE_MASK);
 + return dma_addr + (pa  ~PAGE_MASK);
   }
  
  out_free:
 @@ -312,6 +311,16 @@ out_err:
   return DMA_ERROR_CODE;
  }
  
 +static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 +  unsigned long offset, size_t size,
 +  enum dma_data_direction direction,
 +  struct dma_attrs *attrs)
 +{
 + unsigned long pa = page_to_phys(page) + offset;
 +
 + return s390_dma_map_phys(dev, pa, size, direction, attrs);
 +}
 +
  static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction direction,
struct dma_attrs *attrs)
 @@ -384,8 +393,7 @@ static int s390_dma_map_sg(struct device *dev, struct 
 scatterlist *sg,
   int i;
  
   for_each_sg(sg, s, nr_elements, i) {
 - struct page *page = sg_page(s);
 - s-dma_address = s390_dma_map_pages(dev, page, s-offset,
 + s-dma_address = s390_dma_map_phys(dev, sg_phys(s),
   s-length, dir, NULL);
   if (!dma_mapping_error(dev, s-dma_address)) {
   s-dma_length = s-length;
 -- 
 1.9.1
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 22/27] s390/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-10-16 Thread Sebastian Ott
On Wed, 15 Oct 2014, Yijing Wang wrote:
 Use MSI chip framework instead of arch MSI functions to configure
 MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.
 
 Signed-off-by: Yijing Wang wangyij...@huawei.com
 ---
 Hi Sebastian,
I dropped the Acked-by , because this version has a
 lot changes compared to last. So, I guess you may want to check it again.

I did and I agree with that one too.

Regards,
Sebastian

 ---
  arch/s390/include/asm/pci.h |9 +
  arch/s390/pci/pci.c |   12 ++--
  2 files changed, 19 insertions(+), 2 deletions(-)
 
 diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
 index c030900..4d41f08 100644
 --- a/arch/s390/include/asm/pci.h
 +++ b/arch/s390/include/asm/pci.h
 @@ -88,6 +88,8 @@ struct zpci_dev {
   u32 uid;/* user defined id */
   u8 util_str[CLP_UTIL_STR_LEN];  /* utility string */
 
 + struct msi_chip *msi_chip;
 +
   /* IRQ stuff */
   u64 msi_addr;   /* MSI address */
   struct airq_iv *aibv;   /* adapter interrupt bit vector */
 @@ -121,6 +123,13 @@ struct zpci_dev {
   struct dentry   *debugfs_perf;
  };
 
 +static inline struct msi_chip *pci_msi_chip(struct pci_bus *bus)
 +{
 + struct zpci_dev *zpci = bus-sysdata;
 +
 + return zpci-msi_chip;
 +}
 +
  static inline bool zdev_enabled(struct zpci_dev *zdev)
  {
   return (zdev-fh  (1UL  31)) ? true : false;
 diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
 index 552b990..bf6732f 100644
 --- a/arch/s390/pci/pci.c
 +++ b/arch/s390/pci/pci.c
 @@ -358,7 +358,8 @@ static void zpci_irq_handler(struct airq_struct *airq)
   }
  }
 
 -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 +static int zpci_setup_msi_irqs(struct msi_chip *chip,
 + struct pci_dev *pdev, int nvec, int type)
  {
   struct zpci_dev *zdev = get_zdev(pdev);
   unsigned int hwirq, msi_vecs;
 @@ -434,7 +435,8 @@ out:
   return rc;
  }
 
 -void arch_teardown_msi_irqs(struct pci_dev *pdev)
 +static void zpci_teardown_msi_irqs(struct msi_chip *chip,
 + struct pci_dev *pdev)
  {
   struct zpci_dev *zdev = get_zdev(pdev);
   struct msi_desc *msi;
 @@ -464,6 +466,11 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
   airq_iv_free_bit(zpci_aisb_iv, zdev-aisb);
  }
 
 +static struct msi_chip zpci_msi_chip = {
 + .setup_irqs = zpci_setup_msi_irqs,
 + .teardown_irqs = zpci_teardown_msi_irqs,
 +};
 +
  static void zpci_map_resources(struct zpci_dev *zdev)
  {
   struct pci_dev *pdev = zdev-pdev;
 @@ -749,6 +756,7 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
   if (ret)
   return ret;
 
 + zdev-msi_chip = zpci_msi_chip;
   zdev-bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, pci_root_ops,
 zdev, resources);
   if (!zdev-bus) {
 -- 
 1.7.1
 
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v1 16/21] s390/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-16 Thread Sebastian Ott
Hello,

On Fri, 5 Sep 2014, Yijing Wang wrote:
 Use MSI chip framework instead of arch MSI functions to configure
 MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.
 
 Signed-off-by: Yijing Wang wangyij...@huawei.com
 ---
  arch/s390/pci/pci.c |   18 ++
  1 files changed, 14 insertions(+), 4 deletions(-)
 
 diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
 index 2fa7b14..da5316e 100644
 --- a/arch/s390/pci/pci.c
 +++ b/arch/s390/pci/pci.c
 @@ -358,7 +358,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
   }
  }
 
 -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 +int zpci_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
  {
   struct zpci_dev *zdev = get_zdev(pdev);
   unsigned int hwirq, msi_vecs;
 @@ -434,7 +434,7 @@ out:
   return rc;
  }
 
 -void arch_teardown_msi_irqs(struct pci_dev *pdev)
 +static void zpci_teardown_msi_irqs(struct pci_dev *pdev)
  {
   struct zpci_dev *zdev = get_zdev(pdev);
   struct msi_desc *msi;
 @@ -448,9 +448,9 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
   /* Release MSI interrupts */
   list_for_each_entry(msi, pdev-msi_list, list) {
   if (msi-msi_attrib.is_msix)
 - default_msix_mask_irq(msi, 1);
 + __msix_mask_irq(msi, 1);
   else
 - default_msi_mask_irq(msi, 1, 1);
 + __msi_mask_irq(msi, 1, 1);

The default_msi_mask_irq to __msi_mask_irq renaming is hidden in your
patch x86/xen/MSI: Eliminate arch_msix_mask_irq() and arch_msi_mask_irq()

This means that between that patch and this one s390 will not compile.
Could you please move this hunk to the other patch or even make an extra
patch with the renaming. Other than that:

Acked-by: Sebastian Ott seb...@linux.vnet.ibm.com

Regards,
Sebastian

   irq_set_msi_desc(msi-irq, NULL);
   irq_free_desc(msi-irq);
   msi-msg.address_lo = 0;
 @@ -464,6 +464,16 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
   airq_iv_free_bit(zpci_aisb_iv, zdev-aisb);
  }
 
 +static struct msi_chip zpci_msi_chip = {
 + .setup_irqs = zpci_setup_msi_irqs,
 + .teardown_irqs = zpci_teardown_msi_irqs,
 +};
 +
 +struct msi_chip *arch_find_msi_chip(struct pci_dev *dev)
 +{
 + return zpci_msi_chip;
 +}
 +
  static void zpci_map_resources(struct zpci_dev *zdev)
  {
   struct pci_dev *pdev = zdev-pdev;
 -- 
 1.7.1
 
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev