Re: [PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

2022-07-05 Thread Jason Gunthorpe via iommu
On Tue, Jul 05, 2022 at 04:22:35PM +1000, Alexey Kardashevskiy wrote:

> I have not looked into the domains for ages, what is missing here? With this
> on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,

Does this solve all the problems or just coherency? It seems like it
should solve everything now as there will be a IOMMU_DOMAIN_BLOCKED
and the ref logic will succeed to assign it?

> +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
> +{
> + struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> +
> + if (!domain)
> + return NULL;

This should only succeed if type is IOMMU_DOMAIN_BLOCKED

> +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
> +{
> + struct iommu_group *grp = dev->iommu_group;
> +
> + if (!grp)
> + grp = ERR_PTR(-ENODEV);

It looks like this should just always fail since the code code already
checks iommu_group before calling this? (Arguably ppc should be
refactored to use the normal probe_device and device_group ops to
create groups, but that doesn't seem critical for this.

Thanks
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

2022-07-05 Thread Robin Murphy

On 2022-07-05 07:22, Alexey Kardashevskiy wrote:

Historically PPC64 managed to avoid using iommu_ops. The VFIO driver
uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in
the Type1 VFIO driver. Recent development though has added a coherency
capability check to the generic part of VFIO and essentially disabled
VFIO on PPC64.

This adds a simple iommu_ops stub which reports support for cache
coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices,
this provides minimum code for the probing to not crash.


No more bus_set_iommu() please - I'll be sending a new version of this 
series very soon:


https://lore.kernel.org/linux-iommu/cover.1650890638.git.robin.mur...@arm.com/

Cheers,
Robin.


The previous discussion is here:
https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-...@ozlabs.ru/

Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence")
Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices")
Signed-off-by: Alexey Kardashevskiy 
---

I have not looked into the domains for ages, what is missing here? With this
on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,

---
  arch/powerpc/include/asm/iommu.h |  2 +
  arch/powerpc/kernel/iommu.c  | 70 
  arch/powerpc/kernel/pci_64.c |  3 ++
  3 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 7e29c73e3dd4..4bdae0ee29d0 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
enum dma_data_direction *direction);
  extern void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
+
+extern const struct iommu_ops spapr_tce_iommu_ops;
  #else
  static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 7e56ddb3e0b9..2205b448f7d5 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
  }
  EXPORT_SYMBOL_GPL(iommu_del_device);
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static bool spapr_tce_iommu_capable(enum iommu_cap cap)
+{
+   switch (cap) {
+   case IOMMU_CAP_CACHE_COHERENCY:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+   struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+
+   if (!domain)
+   return NULL;
+
+   domain->geometry.aperture_start = 0;
+   domain->geometry.aperture_end = ~0ULL;
+   domain->geometry.force_aperture = true;
+
+   return domain;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+   struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), 
GFP_KERNEL);
+
+   iommu_dev->dev = dev;
+   iommu_dev->ops = &spapr_tce_iommu_ops;
+
+   return iommu_dev;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom,
+ struct device *dev)
+{
+   return 0;
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+   struct iommu_group *grp = dev->iommu_group;
+
+   if (!grp)
+   grp = ERR_PTR(-ENODEV);
+   return grp;
+}
+
+const struct iommu_ops spapr_tce_iommu_ops = {
+   .capable = spapr_tce_iommu_capable,
+   .domain_alloc = spapr_tce_iommu_domain_alloc,
+   .probe_device = spapr_tce_iommu_probe_device,
+   .release_device = spapr_tce_iommu_release_device,
+   .device_group = spapr_tce_iommu_device_group,
+   .default_domain_ops = &(const struct iommu_domain_ops) {
+   .attach_dev = spapr_tce_iommu_attach_dev,
+   }
+};
+
  #endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 19b03ddf5631..04bc0c52e45c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -20,6 +20,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -27,6 +28,7 @@
  #include 
  #include 
  #include 
+#include 
  
  /* pci_io_base -- the base address from which io bars are offsets.

   * This is the lowest I/O base address (so bar values are always positive),
@@ -69,6 +71,7 @@ static int __init pcibios_init(void)
ppc_md.pcibios_fixup();
  
  	printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");

+   bus_set_iommu(&pci_bus_type, &spapr_tce_iommu_ops);
  
  	return 0;

  }

_

[PATCH kernel] powerpc/iommu: Add simple iommu_ops to report capabilities

2022-07-04 Thread Alexey Kardashevskiy
Historically PPC64 managed to avoid using iommu_ops. The VFIO driver
uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in
the Type1 VFIO driver. Recent development though has added a coherency
capability check to the generic part of VFIO and essentially disabled
VFIO on PPC64.

This adds a simple iommu_ops stub which reports support for cache
coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices,
this provides minimum code for the probing to not crash.

The previous discussion is here:
https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-...@ozlabs.ru/

Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence")
Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices")
Signed-off-by: Alexey Kardashevskiy 
---

I have not looked into the domains for ages, what is missing here? With this
on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks,

---
 arch/powerpc/include/asm/iommu.h |  2 +
 arch/powerpc/kernel/iommu.c  | 70 
 arch/powerpc/kernel/pci_64.c |  3 ++
 3 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 7e29c73e3dd4..4bdae0ee29d0 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
enum dma_data_direction *direction);
 extern void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
+
+extern const struct iommu_ops spapr_tce_iommu_ops;
 #else
 static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 7e56ddb3e0b9..2205b448f7d5 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
 }
 EXPORT_SYMBOL_GPL(iommu_del_device);
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static bool spapr_tce_iommu_capable(enum iommu_cap cap)
+{
+   switch (cap) {
+   case IOMMU_CAP_CACHE_COHERENCY:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+   struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+
+   if (!domain)
+   return NULL;
+
+   domain->geometry.aperture_start = 0;
+   domain->geometry.aperture_end = ~0ULL;
+   domain->geometry.force_aperture = true;
+
+   return domain;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+   struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), 
GFP_KERNEL);
+
+   iommu_dev->dev = dev;
+   iommu_dev->ops = &spapr_tce_iommu_ops;
+
+   return iommu_dev;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom,
+ struct device *dev)
+{
+   return 0;
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+   struct iommu_group *grp = dev->iommu_group;
+
+   if (!grp)
+   grp = ERR_PTR(-ENODEV);
+   return grp;
+}
+
+const struct iommu_ops spapr_tce_iommu_ops = {
+   .capable = spapr_tce_iommu_capable,
+   .domain_alloc = spapr_tce_iommu_domain_alloc,
+   .probe_device = spapr_tce_iommu_probe_device,
+   .release_device = spapr_tce_iommu_release_device,
+   .device_group = spapr_tce_iommu_device_group,
+   .default_domain_ops = &(const struct iommu_domain_ops) {
+   .attach_dev = spapr_tce_iommu_attach_dev,
+   }
+};
+
 #endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 19b03ddf5631..04bc0c52e45c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -27,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* pci_io_base -- the base address from which io bars are offsets.
  * This is the lowest I/O base address (so bar values are always positive),
@@ -69,6 +71,7 @@ static int __init pcibios_init(void)
ppc_md.pcibios_fixup();
 
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
+   bus_set_iommu(&pci_bus_type, &spapr_tce_iommu_ops);
 
return 0;
 }
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu