date:20220314

Re: [PATCH 13/15] swiotlb: merge swiotlb-xen initialization into swiotlb

2022-03-14 Thread Christoph Hellwig

On Mon, Mar 14, 2022 at 07:07:44PM -0400, Boris Ostrovsky wrote:
>> +swiotlb_init_remap(true, x86_swiotlb_flags, xen_swiotlb_fixup);
>
>
> I think we need to have SWIOTLB_ANY set in x86_swiotlb_flags here.

Yes.

> Notice that we don't do remap() after final update to nslabs. We should.

Indeed.  I've pushed the fixed patches to the git tree and attached
the patches 12, 13 and 14 in case that is easier:
>From 6d72b98620281984ae778659cedeb369e82af8d8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig 
Date: Mon, 14 Mar 2022 08:02:57 +0100
Subject: swiotlb: provide swiotlb_init variants that remap the buffer

To shared more code between swiotlb and xen-swiotlb, offer a
swiotlb_init_remap interface and add a remap callback to
swiotlb_init_late that will allow Xen to remap the buffer the
buffer without duplicating much of the logic.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/pci/sta2x11-fixup.c |  2 +-
 include/linux/swiotlb.h  |  5 -
 kernel/dma/swiotlb.c | 36 +---
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index c7e6faf59a861..7368afc039987 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
 		int size = STA2X11_SWIOTLB_SIZE;
 		/* First instance: register your own swiotlb area */
 		dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-		if (swiotlb_init_late(size, GFP_DMA))
+		if (swiotlb_init_late(size, GFP_DMA, NULL))
 			dev_emerg(&pdev->dev, "init swiotlb failed\n");
 	}
 	list_add(&instance->list, &sta2x11_instance_list);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ee655f2e4d28b..7b50c82f84ce9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -36,8 +36,11 @@ struct scatterlist;
 
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
 unsigned long swiotlb_size_or_default(void);
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+	int (*remap)(void *tlb, unsigned long nslabs));
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+	int (*remap)(void *tlb, unsigned long nslabs));
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-int swiotlb_init_late(size_t size, gfp_t gfp_mask);
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 79641c446d284..c37fd3d1c97f7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -256,9 +256,11 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs,
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
  */
-void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+		int (*remap)(void *tlb, unsigned long nslabs))
 {
-	size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+	unsigned long nslabs = default_nslabs;
+	size_t bytes;
 	void *tlb;
 
 	if (!addressing_limit && !swiotlb_force_bounce)
@@ -271,12 +273,23 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags)
 	 * allow to pick a location everywhere for hypervisors with guest
 	 * memory encryption.
 	 */
+retry:
+	bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
 	if (flags & SWIOTLB_ANY)
 		tlb = memblock_alloc(bytes, PAGE_SIZE);
 	else
 		tlb = memblock_alloc_low(bytes, PAGE_SIZE);
 	if (!tlb)
 		goto fail;
+	if (remap && remap(tlb, nslabs) < 0) {
+		memblock_free(tlb, PAGE_ALIGN(bytes));
+
+		if (nslabs <= IO_TLB_MIN_SLABS)
+			panic("%s: Failed to remap %zu bytes\n",
+			  __func__, bytes);
+		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+		goto retry;
+	}
 	if (swiotlb_init_with_tbl(tlb, default_nslabs, flags))
 		goto fail_free_mem;
 	return;
@@ -287,12 +300,18 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags)
 	pr_warn("Cannot allocate buffer");
 }
 
+void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+{
+	return swiotlb_init_remap(addressing_limit, flags, NULL);
+}
+
 /*
  * Systems with larger DMA zones (those that don't support ISA) can
  * initialize the swiotlb later using the slab allocator if needed.
  * This should be just like above, but with some error catching.
  */
-int swiotlb_init_late(size_t size, gfp_t gfp_mask)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+		int (*remap)(void *tlb, unsigned long nslabs))
 {
 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
 	unsigned long bytes;
@@ -303,6 +322,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
 	if (swiotlb_force_disable)
 		return 0;
 
+retry:
 	order = get_order(nslabs << IO_TLB_SHIFT);
 	nslabs = SLABS_PER_PAGE << order;
 	bytes = nslab

Re: [PATCH 12/15] swiotlb: provide swiotlb_init variants that remap the buffer

2022-03-14 Thread Christoph Hellwig

On Mon, Mar 14, 2022 at 06:39:21PM -0400, Boris Ostrovsky wrote:
> This is IO_TLB_MIN_SLABS, isn't it? (Xen code didn't say so but that's what 
> it meant to say I believe)

Yes, that makes much more sense.  I've switched the patch to use
IO_TLB_MIN_SLABS and drop the 2MB comment in both places.

Can I get a review with that fixed up?

---
>From 153085bf3e6e69d676bef0fb96395a86fb8122f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig 
Date: Mon, 14 Mar 2022 08:02:57 +0100
Subject: swiotlb: provide swiotlb_init variants that remap the buffer

To shared more code between swiotlb and xen-swiotlb, offer a
swiotlb_init_remap interface and add a remap callback to
swiotlb_init_late that will allow Xen to remap the buffer the
buffer without duplicating much of the logic.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/pci/sta2x11-fixup.c |  2 +-
 include/linux/swiotlb.h  |  5 -
 kernel/dma/swiotlb.c | 36 +---
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index c7e6faf59a861..7368afc039987 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
int size = STA2X11_SWIOTLB_SIZE;
/* First instance: register your own swiotlb area */
dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-   if (swiotlb_init_late(size, GFP_DMA))
+   if (swiotlb_init_late(size, GFP_DMA, NULL))
dev_emerg(&pdev->dev, "init swiotlb failed\n");
}
list_add(&instance->list, &sta2x11_instance_list);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ee655f2e4d28b..7b50c82f84ce9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -36,8 +36,11 @@ struct scatterlist;
 
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
 unsigned long swiotlb_size_or_default(void);
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+   int (*remap)(void *tlb, unsigned long nslabs));
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+   int (*remap)(void *tlb, unsigned long nslabs));
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-int swiotlb_init_late(size_t size, gfp_t gfp_mask);
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 79641c446d284..b3d4f24fb5f5e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -256,9 +256,11 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs,
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
  */
-void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+   int (*remap)(void *tlb, unsigned long nslabs))
 {
-   size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+   unsigned long nslabs = default_nslabs;
+   size_t bytes;
void *tlb;
 
if (!addressing_limit && !swiotlb_force_bounce)
@@ -271,12 +273,23 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
 * allow to pick a location everywhere for hypervisors with guest
 * memory encryption.
 */
+retry:
+   bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
if (flags & SWIOTLB_ANY)
tlb = memblock_alloc(bytes, PAGE_SIZE);
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb)
goto fail;
+   if (remap && remap(tlb, nslabs) < 0) {
+   memblock_free(tlb, PAGE_ALIGN(bytes));
+
+   if (nslabs <= IO_TLB_MIN_SLABS)
+   panic("%s: Failed to remap %zu bytes\n",
+ __func__, bytes);
+   nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+   goto retry;
+   }
if (swiotlb_init_with_tbl(tlb, default_nslabs, flags))
goto fail_free_mem;
return;
@@ -287,12 +300,18 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
pr_warn("Cannot allocate buffer");
 }
 
+void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+{
+   return swiotlb_init_remap(addressing_limit, flags, NULL);
+}
+
 /*
  * Systems with larger DMA zones (those that don't support ISA) can
  * initialize the swiotlb later using the slab allocator if needed.
  * This should be just like above, but with some error catching.
  */
-int swiotlb_init_late(size_t size, gfp_t gfp_mask)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+   int (*remap)(void *tlb, unsigned long nsl

[PATCH v2 1/8] iommu: Assign per device max PASID

2022-03-14 Thread Jacob Pan

From: Lu Baolu 

PCIe spec defines Max PASID Width per-device.  Since a PASID is only
used with IOMMU enabled, this patch introduces a PASID max variable on
the per-device IOMMU data. It will be used for limiting PASID allocation
in that PASID table is per-device.

Signed-off-by: Lu Baolu 
Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel/iommu.c |  4 +++-
 include/linux/iommu.h   | 13 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 50666d250b36..881f8361eca2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2602,8 +2602,10 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
-   if (features >= 0)
+   if (features >= 0) {
info->pasid_supported = features | 1;
+   iommu_set_dev_pasid_max(&pdev->dev, 
pci_max_pasids(pdev));
+   }
}
 
if (info->ats_supported && ecap_prs(iommu->ecap) &&
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index de0c57a567c8..369f05c2a4e2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -364,6 +364,7 @@ struct iommu_fault_param {
  * @fwspec: IOMMU fwspec data
  * @iommu_dev:  IOMMU device this device is linked to
  * @priv:   IOMMU Driver private data
+ * @pasid_max   Max PASID value supported by this device
  *
  * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
  * struct iommu_group  *iommu_group;
@@ -375,8 +376,20 @@ struct dev_iommu {
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void*priv;
+   unsigned intpasid_max;
 };
 
+static inline void iommu_set_dev_pasid_max(struct device *dev,
+   unsigned int max)
+{
+   struct dev_iommu *param = dev->iommu;
+
+   if (WARN_ON(!param))
+   return;
+
+   param->pasid_max = max;
+}
+
 int iommu_device_register(struct iommu_device *iommu,
  const struct iommu_ops *ops,
  struct device *hwdev);
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 4/8] iommu/vt-d: Use device_pasid attach op for RID2PASID

2022-03-14 Thread Jacob Pan

With the availability of a generic device-PASID-domain attachment API,
there's no need to special case RID2PASID.  Use the API to replace
duplicated code.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel/iommu.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9267194eaed3..f832b7599d21 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1683,9 +1683,6 @@ static void domain_flush_piotlb(struct intel_iommu *iommu,
qi_flush_piotlb(iommu, did, domain->default_pasid,
addr, npages, ih);
 
-   if (!list_empty(&domain->devices))
-   qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
-
if (list_empty(&domain->devices) || xa_empty(&domain->pasids))
return;
 
@@ -2826,17 +2823,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
/* Setup the PASID entry for requests without PASID: */
-   spin_lock_irqsave(&iommu->lock, flags);
-   if (hw_pass_through && domain_type_is_si(domain))
-   ret = intel_pasid_setup_pass_through(iommu, domain,
-   dev, PASID_RID2PASID);
-   else if (domain_use_first_level(domain))
-   ret = domain_setup_first_level(iommu, domain, dev,
-   PASID_RID2PASID);
-   else
-   ret = intel_pasid_setup_second_level(iommu, domain,
-   dev, PASID_RID2PASID);
-   spin_unlock_irqrestore(&iommu->lock, flags);
+   ret = intel_iommu_attach_dev_pasid(&domain->domain, dev, 
PASID_RID2PASID);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
@@ -4618,8 +4605,7 @@ static void __dmar_remove_one_dev_info(struct 
device_domain_info *info)
 
if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
-   intel_pasid_tear_down_entry(iommu, info->dev,
-   PASID_RID2PASID, false);
+   intel_iommu_detach_dev_pasid(&domain->domain, 
info->dev, PASID_RID2PASID);
 
iommu_disable_dev_iotlb(info);
domain_context_clear(info);
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 7/8] iommu/vt-d: Delete supervisor/kernel SVA

2022-03-14 Thread Jacob Pan

In-kernel DMA with PASID should use DMA API now, remove supervisor PASID
SVA support. Remove special cases in bind mm and page request service.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel/svm.c | 42 ---
 1 file changed, 8 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 2c53689da461..37d6218f173b 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -516,11 +516,10 @@ static void intel_svm_free_pasid(struct mm_struct *mm)
 
 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
   struct device *dev,
-  struct mm_struct *mm,
-  unsigned int flags)
+  struct mm_struct *mm)
 {
struct device_domain_info *info = get_domain_info(dev);
-   unsigned long iflags, sflags;
+   unsigned long iflags, sflags = 0;
struct intel_svm_dev *sdev;
struct intel_svm *svm;
int ret = 0;
@@ -533,16 +532,13 @@ static struct iommu_sva *intel_svm_bind_mm(struct 
intel_iommu *iommu,
 
svm->pasid = mm->pasid;
svm->mm = mm;
-   svm->flags = flags;
INIT_LIST_HEAD_RCU(&svm->devs);
 
-   if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
-   svm->notifier.ops = &intel_mmuops;
-   ret = mmu_notifier_register(&svm->notifier, mm);
-   if (ret) {
-   kfree(svm);
-   return ERR_PTR(ret);
-   }
+   svm->notifier.ops = &intel_mmuops;
+   ret = mmu_notifier_register(&svm->notifier, mm);
+   if (ret) {
+   kfree(svm);
+   return ERR_PTR(ret);
}
 
ret = pasid_private_add(svm->pasid, svm);
@@ -583,8 +579,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct 
intel_iommu *iommu,
}
 
/* Setup the pasid table: */
-   sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
-   PASID_FLAG_SUPERVISOR_MODE : 0;
sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
@@ -957,7 +951,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 * to unbind the mm while any page faults are 
outstanding.
 */
svm = pasid_private_find(req->pasid);
-   if (IS_ERR_OR_NULL(svm) || (svm->flags & 
SVM_FLAG_SUPERVISOR_MODE))
+   if (IS_ERR_OR_NULL(svm))
goto bad_req;
}
 
@@ -1011,29 +1005,9 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, 
void *drvdata)
 {
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
-   unsigned int flags = 0;
struct iommu_sva *sva;
int ret;
 
-   if (drvdata)
-   flags = *(unsigned int *)drvdata;
-
-   if (flags & SVM_FLAG_SUPERVISOR_MODE) {
-   if (!ecap_srs(iommu->ecap)) {
-   dev_err(dev, "%s: Supervisor PASID not supported\n",
-   iommu->name);
-   return ERR_PTR(-EOPNOTSUPP);
-   }
-
-   if (mm) {
-   dev_err(dev, "%s: Supervisor PASID with user provided 
mm\n",
-   iommu->name);
-   return ERR_PTR(-EINVAL);
-   }
-
-   mm = &init_mm;
-   }
-
mutex_lock(&pasid_mutex);
ret = intel_svm_alloc_pasid(dev, mm, flags);
if (ret) {
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 6/8] dmaengine: idxd: Use DMA API for in-kernel DMA with PASID

2022-03-14 Thread Jacob Pan

The current in-kernel supervisor PASID support is based on the SVM/SVA
machinery in SVA lib. The binding between a kernel PASID and kernel
mapping has many flaws. See discussions in the link below.

This patch enables in-kernel DMA by switching from SVA lib to the
standard DMA mapping APIs. Since both DMA requests with and without
PASIDs are mapped identically, there is no change to how DMA APIs are
used after the kernel PASID is enabled.

Link: https://lore.kernel.org/linux-iommu/20210511194726.gp1002...@nvidia.com/
Signed-off-by: Jacob Pan 
---
 drivers/dma/idxd/idxd.h  |  1 -
 drivers/dma/idxd/init.c  | 34 +-
 drivers/dma/idxd/sysfs.c |  7 ---
 3 files changed, 9 insertions(+), 33 deletions(-)

diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index da72eb15f610..a09ab4a6e1c1 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -276,7 +276,6 @@ struct idxd_device {
struct idxd_wq **wqs;
struct idxd_engine **engines;
 
-   struct iommu_sva *sva;
unsigned int pasid;
 
int num_groups;
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 08a5f4310188..5d1f8dd4abf6 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "../dmaengine.h"
@@ -466,36 +467,22 @@ static struct idxd_device *idxd_alloc(struct pci_dev 
*pdev, struct idxd_driver_d
 
 static int idxd_enable_system_pasid(struct idxd_device *idxd)
 {
-   int flags;
-   unsigned int pasid;
-   struct iommu_sva *sva;
+   u32 pasid;
+   int ret;
 
-   flags = SVM_FLAG_SUPERVISOR_MODE;
-
-   sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
-   if (IS_ERR(sva)) {
-   dev_warn(&idxd->pdev->dev,
-"iommu sva bind failed: %ld\n", PTR_ERR(sva));
-   return PTR_ERR(sva);
-   }
-
-   pasid = iommu_sva_get_pasid(sva);
-   if (pasid == IOMMU_PASID_INVALID) {
-   iommu_sva_unbind_device(sva);
-   return -ENODEV;
+   ret = iommu_enable_pasid_dma(&idxd->pdev->dev, &pasid);
+   if (ret) {
+   dev_err(&idxd->pdev->dev, "No DMA PASID %d\n", ret);
+   return ret;
}
-
-   idxd->sva = sva;
idxd->pasid = pasid;
-   dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
+
return 0;
 }
 
 static void idxd_disable_system_pasid(struct idxd_device *idxd)
 {
-
-   iommu_sva_unbind_device(idxd->sva);
-   idxd->sva = NULL;
+   iommu_disable_pasid_dma(&idxd->pdev->dev, idxd->pasid);
 }
 
 static int idxd_probe(struct idxd_device *idxd)
@@ -524,10 +511,7 @@ static int idxd_probe(struct idxd_device *idxd)
} else {
dev_warn(dev, "Unable to turn on SVA feature.\n");
}
-   } else if (!sva) {
-   dev_warn(dev, "User forced SVA off via module param.\n");
}
-
idxd_read_caps(idxd);
idxd_read_table_offsets(idxd);
 
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 7e19ab92b61a..fde6656695ba 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -839,13 +839,6 @@ static ssize_t wq_name_store(struct device *dev,
if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0)
return -EINVAL;
 
-   /*
-* This is temporarily placed here until we have SVM support for
-* dmaengine.
-*/
-   if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd))
-   return -EOPNOTSUPP;
-
memset(wq->name, 0, WQ_NAME_SIZE + 1);
strncpy(wq->name, buf, WQ_NAME_SIZE);
strreplace(wq->name, '\n', '\0');
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 9/9] dmaengine: idxd: separate user and kernel pasid enabling

2022-03-14 Thread Jacob Pan

From: Dave Jiang 

The idxd driver always gated the pasid enabling under a single knob and
this assumption is incorrect. The pasid used for kernel operation can be
independently toggled and has no dependency on the user pasid (and vice
versa). Split the two so they are independent "enabled" flags.

Signed-off-by: Dave Jiang 
Signed-off-by: Jacob Pan 
---
 drivers/dma/idxd/cdev.c |  4 ++--
 drivers/dma/idxd/idxd.h |  6 ++
 drivers/dma/idxd/init.c | 30 ++
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 312ec37ebf91..addaebca7683 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -99,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file 
*filp)
ctx->wq = wq;
filp->private_data = ctx;
 
-   if (device_pasid_enabled(idxd)) {
+   if (device_user_pasid_enabled(idxd)) {
sva = iommu_sva_bind_device(dev, current->mm);
if (IS_ERR(sva)) {
rc = PTR_ERR(sva);
@@ -152,7 +152,7 @@ static int idxd_cdev_release(struct inode *node, struct 
file *filep)
if (wq_shared(wq)) {
idxd_device_drain_pasid(idxd, ctx->pasid);
} else {
-   if (device_pasid_enabled(idxd)) {
+   if (device_user_pasid_enabled(idxd)) {
/* The wq disable in the disable pasid function will 
drain the wq */
rc = idxd_wq_disable_pasid(wq);
if (rc < 0)
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index a09ab4a6e1c1..190b08bd7c08 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -239,6 +239,7 @@ enum idxd_device_flag {
IDXD_FLAG_CONFIGURABLE = 0,
IDXD_FLAG_CMD_RUNNING,
IDXD_FLAG_PASID_ENABLED,
+   IDXD_FLAG_USER_PASID_ENABLED,
 };
 
 struct idxd_dma_dev {
@@ -468,6 +469,11 @@ static inline bool device_pasid_enabled(struct idxd_device 
*idxd)
return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
 }
 
+static inline bool device_user_pasid_enabled(struct idxd_device *idxd)
+{
+   return test_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
+}
+
 static inline bool device_swq_supported(struct idxd_device *idxd)
 {
return (support_enqcmd && device_pasid_enabled(idxd));
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 5d1f8dd4abf6..981150b7d09b 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -500,16 +500,19 @@ static int idxd_probe(struct idxd_device *idxd)
 
if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA);
-   if (rc == 0) {
-   rc = idxd_enable_system_pasid(idxd);
-   if (rc < 0) {
-   iommu_dev_disable_feature(dev, 
IOMMU_DEV_FEAT_SVA);
-   dev_warn(dev, "Failed to enable PASID. No SVA 
support: %d\n", rc);
-   } else {
-   set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
-   }
-   } else {
+   if (rc) {
+   /*
+* Do not bail here since legacy DMA is still
+* supported, both user and in-kernel DMA with
+* PASID rely on SVA feature.
+*/
dev_warn(dev, "Unable to turn on SVA feature.\n");
+   } else {
+   set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
+   if (idxd_enable_system_pasid(idxd))
+   dev_warn(dev, "No in-kernel DMA with PASID.\n");
+   else
+   set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
}
}
idxd_read_caps(idxd);
@@ -545,7 +548,8 @@ static int idxd_probe(struct idxd_device *idxd)
  err:
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
-   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+   if (device_user_pasid_enabled(idxd) || device_pasid_enabled(idxd))
+   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
return rc;
 }
 
@@ -558,7 +562,8 @@ static void idxd_cleanup(struct idxd_device *idxd)
idxd_cleanup_internals(idxd);
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
-   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+   if (device_user_pasid_enabled(idxd) || device_pasid_enabled(idxd))
+   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
 }
 
 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -677,7 +682,8 @@ static void idxd_remove(struct pci_dev *pdev)
free_irq(irq_entry->vector, irq_entry);
pci_free_irq_vecto

[PATCH v2 3/8] iommu/vt-d: Implement device_pasid domain attach ops

2022-03-14 Thread Jacob Pan

On VT-d platforms with scalable mode enabled, devices issue DMA requests
with PASID need to attach to the correct IOMMU domains.
The attach operation involves the following:
- programming the PASID into device's PASID table
- tracking device domain and the PASID relationship
- managing IOTLB and device TLB invalidations

This patch extends DMAR domain and device domain info with xarrays to
track per domain and per device PASIDs.  It provides the flexibility to
be used beyond DMA API PASID support.

Signed-off-by: Lu Baolu 
Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel/iommu.c | 194 +++-
 include/linux/intel-iommu.h |  12 ++-
 2 files changed, 202 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 881f8361eca2..9267194eaed3 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1622,20 +1622,48 @@ static void __iommu_flush_dev_iotlb(struct 
device_domain_info *info,
   qdep, addr, mask);
 }
 
+static void __iommu_flush_dev_piotlb(struct device_domain_info *info,
+   u64 address,
+ioasid_t pasid, unsigned int mask)
+{
+   u16 sid, qdep;
+
+   if (!info || !info->ats_enabled)
+   return;
+
+   sid = info->bus << 8 | info->devfn;
+   qdep = info->ats_qdep;
+   qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
+pasid, qdep, address, mask);
+}
+
 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
  u64 addr, unsigned mask)
 {
unsigned long flags;
struct device_domain_info *info;
struct subdev_domain_info *sinfo;
+   unsigned long pasid;
+   struct pasid_info *pinfo;
 
if (!domain->has_iotlb_device)
return;
 
spin_lock_irqsave(&device_domain_lock, flags);
-   list_for_each_entry(info, &domain->devices, link)
-   __iommu_flush_dev_iotlb(info, addr, mask);
-
+   list_for_each_entry(info, &domain->devices, link) {
+   /*
+* We cannot use PASID based devTLB invalidation on RID2PASID
+* Device does not understand RID2PASID/0. This is different
+* than IOTLB invalidation where RID2PASID is also used for
+* tagging.
+*/
+   xa_for_each(&info->pasids, pasid, pinfo) {
+   if (!pasid)
+   __iommu_flush_dev_iotlb(info, addr, mask);
+   else
+   __iommu_flush_dev_piotlb(info, addr, pasid, 
mask);
+   }
+   }
list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
info = get_domain_info(sinfo->pdev);
__iommu_flush_dev_iotlb(info, addr, mask);
@@ -1648,6 +1676,8 @@ static void domain_flush_piotlb(struct intel_iommu *iommu,
u64 addr, unsigned long npages, bool ih)
 {
u16 did = domain->iommu_did[iommu->seq_id];
+   struct pasid_info *pinfo;
+   unsigned long pasid;
 
if (domain->default_pasid)
qi_flush_piotlb(iommu, did, domain->default_pasid,
@@ -1655,6 +1685,21 @@ static void domain_flush_piotlb(struct intel_iommu 
*iommu,
 
if (!list_empty(&domain->devices))
qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
+
+   if (list_empty(&domain->devices) || xa_empty(&domain->pasids))
+   return;
+
+   /*
+* Flush IOTLBs for all the PASIDs attached to this domain, RID2PASID
+* included.
+* TODO: If there are many PASIDs, we may resort to flush with
+* domain ID which may have performance benefits due to fewer
+* invalidation descriptors. VM exits may be reduced when running on
+* vIOMMU. The current use cases utilize no more than 2 PASIDs per
+* device, i.e. RID2PASID and a kernel DMA API PASID.
+*/
+   xa_for_each(&domain->pasids, pasid, pinfo)
+   qi_flush_piotlb(iommu, did, pasid, addr, npages, ih);
 }
 
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
@@ -1902,6 +1947,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
INIT_LIST_HEAD(&domain->subdevices);
+   xa_init(&domain->pasids);
 
return domain;
 }
@@ -2556,6 +2602,144 @@ static bool dev_is_real_dma_subdevice(struct device 
*dev)
   pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
 }
 
+
+static bool is_device_domain_attached(struct dmar_domain *dmar_domain,
+ struct device *dev)
+{
+   struct device_domain_info *info;
+
+   list_for_each_entry(info, &dmar_domain->devices, link) {
+   if (info->dev == dev)
+

[PATCH v2 8/8] iommu: Remove unused driver data in sva_bind_device

2022-03-14 Thread Jacob Pan

No one is using drvdata for sva_bind_device after kernel SVA support is
removed from VT-d driver. Remove the drvdata parameter as well.

Signed-off-by: Jacob Pan 
---
 drivers/dma/idxd/cdev.c | 2 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 ++---
 drivers/iommu/intel/svm.c   | 9 -
 drivers/iommu/iommu.c   | 4 ++--
 drivers/misc/uacce/uacce.c  | 2 +-
 include/linux/intel-iommu.h | 3 +--
 include/linux/iommu.h   | 9 +++--
 8 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index b9b2b4a4124e..312ec37ebf91 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -100,7 +100,7 @@ static int idxd_cdev_open(struct inode *inode, struct file 
*filp)
filp->private_data = ctx;
 
if (device_pasid_enabled(idxd)) {
-   sva = iommu_sva_bind_device(dev, current->mm, NULL);
+   sva = iommu_sva_bind_device(dev, current->mm);
if (IS_ERR(sva)) {
rc = PTR_ERR(sva);
dev_err(dev, "pasid allocation failed: %d\n", rc);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a737ba5f727e..eb2f5cb0701a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -354,7 +354,7 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct 
*mm)
 }
 
 struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 {
struct iommu_sva *handle;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index cd48590ada30..d2ba86470c42 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -754,8 +754,7 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master 
*master);
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
 bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
-struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
-   void *drvdata);
+struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm);
 void arm_smmu_sva_unbind(struct iommu_sva *handle);
 u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
 void arm_smmu_sva_notifier_synchronize(void);
@@ -791,7 +790,7 @@ static inline bool arm_smmu_master_iopf_supported(struct 
arm_smmu_master *master
 }
 
 static inline struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 {
return ERR_PTR(-ENODEV);
 }
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 37d6218f173b..94deb58375f5 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -500,8 +500,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
return ret;
 }
 
-static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
-unsigned int flags)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm)
 {
ioasid_t max_pasid = dev_is_pci(dev) ?
pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
@@ -1002,20 +1001,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
return IRQ_RETVAL(handled);
 }
 
-struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, 
void *drvdata)
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm)
 {
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct iommu_sva *sva;
int ret;
 
mutex_lock(&pasid_mutex);
-   ret = intel_svm_alloc_pasid(dev, mm, flags);
+   ret = intel_svm_alloc_pasid(dev, mm);
if (ret) {
mutex_unlock(&pasid_mutex);
return ERR_PTR(ret);
}
 
-   sva = intel_svm_bind_mm(iommu, dev, mm, flags);
+   sva = intel_svm_bind_mm(iommu, dev, mm);
if (IS_ERR_OR_NULL(sva))
intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 107dcf5938d6..fef34879bc0c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3049,7 +3049,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
  * On error, returns an ERR_PTR value.
  */
 struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+iommu_sva_bind_device(struct d

[PATCH v2 2/8] iommu: Add attach/detach_dev_pasid domain ops

2022-03-14 Thread Jacob Pan

From: Lu Baolu 

An IOMMU domain represents an address space which can be attached by
devices that perform DMA within a domain. However, for platforms with
PASID capability the domain attachment needs be handled at device+PASID
level. There can be multiple PASIDs within a device and multiple devices
attached to a given domain.
This patch introduces a new IOMMU op which support device, PASID, and
IOMMU domain attachment. The immediate use case is for PASID capable
devices to perform DMA under DMA APIs.

Signed-off-by: Lu Baolu 
Signed-off-by: Jacob Pan 
---
 include/linux/iommu.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 369f05c2a4e2..fde5b933dbe3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -227,6 +227,8 @@ struct iommu_iotlb_gather {
  * @aux_get_pasid: get the pasid given an aux-domain
  * @sva_bind: Bind process address space to device
  * @sva_unbind: Unbind process address space from device
+ * @attach_dev_pasid: attach an iommu domain to a pasid of device
+ * @detach_dev_pasid: detach an iommu domain from a pasid of device
  * @sva_get_pasid: Get PASID associated to a SVA handle
  * @page_response: handle page request response
  * @cache_invalidate: invalidate translation caches
@@ -296,6 +298,10 @@ struct iommu_ops {
struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
  void *drvdata);
void (*sva_unbind)(struct iommu_sva *handle);
+   int (*attach_dev_pasid)(struct iommu_domain *domain,
+   struct device *dev, ioasid_t id);
+   void (*detach_dev_pasid)(struct iommu_domain *domain,
+struct device *dev, ioasid_t id);
u32 (*sva_get_pasid)(struct iommu_sva *handle);
 
int (*page_response)(struct device *dev,
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 5/8] iommu: Add PASID support for DMA mapping API users

2022-03-14 Thread Jacob Pan

DMA mapping API is the de facto standard for in-kernel DMA. It operates
on a per device/RID basis which is not PASID-aware.

Some modern devices such as Intel Data Streaming Accelerator, PASID is
required for certain work submissions. To allow such devices use DMA
mapping API, we need the following functionalities:
1. Provide device a way to retrieve a PASID for work submission within
the kernel
2. Enable the kernel PASID on the IOMMU for the device
3. Attach the kernel PASID to the device's default DMA domain, let it
be IOVA or physical address in case of pass-through.

This patch introduces a driver facing API that enables DMA API
PASID usage. Once enabled, device drivers can continue to use DMA APIs as
is. There is no difference in dma_handle between without PASID and with
PASID.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/dma-iommu.c | 65 +++
 include/linux/dma-iommu.h |  7 +
 include/linux/iommu.h |  9 ++
 3 files changed, 81 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index b22034975301..d0ff1a34b1b6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -39,6 +39,8 @@ enum iommu_dma_cookie_type {
IOMMU_DMA_MSI_COOKIE,
 };
 
+static DECLARE_IOASID_SET(iommu_dma_pasid);
+
 struct iommu_dma_cookie {
enum iommu_dma_cookie_type  type;
union {
@@ -370,6 +372,69 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
domain->iova_cookie = NULL;
 }
 
+/**
+ * iommu_enable_pasid_dma --Enable in-kernel DMA request with PASID
+ * @dev:   Device to be enabled
+ *
+ * DMA request with PASID will be mapped the same way as the legacy DMA.
+ * If the device is in pass-through, PASID will also pass-through. If the
+ * device is in IOVA map, the supervisor PASID will point to the same IOVA
+ * page table.
+ *
+ * @return the kernel PASID to be used for DMA or INVALID_IOASID on failure
+ */
+int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid)
+{
+   struct iommu_domain *dom;
+   ioasid_t id, max;
+   int ret;
+
+   dom = iommu_get_domain_for_dev(dev);
+   if (!dom || !dom->ops || !dom->ops->attach_dev_pasid)
+   return -ENODEV;
+   max = iommu_get_dev_pasid_max(dev);
+   if (!max)
+   return -EINVAL;
+
+   id = ioasid_alloc(&iommu_dma_pasid, 1, max, dev);
+   if (id == INVALID_IOASID)
+   return -ENOMEM;
+
+   ret = dom->ops->attach_dev_pasid(dom, dev, id);
+   if (ret) {
+   ioasid_put(id);
+   return ret;
+   }
+   *pasid = id;
+
+   return ret;
+}
+EXPORT_SYMBOL(iommu_enable_pasid_dma);
+
+/**
+ * iommu_disable_pasid_dma --Disable in-kernel DMA request with PASID
+ * @dev:   Device's PASID DMA to be disabled
+ *
+ * It is the device driver's responsibility to ensure no more incoming DMA
+ * requests with the kernel PASID before calling this function. IOMMU driver
+ * ensures PASID cache, IOTLBs related to the kernel PASID are cleared and
+ * drained.
+ *
+ * @return 0 on success or error code on failure
+ */
+void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid)
+{
+   struct iommu_domain *dom;
+
+   /* TODO: check the given PASID is within the ioasid_set */
+   dom = iommu_get_domain_for_dev(dev);
+   if (!dom->ops->detach_dev_pasid)
+   return;
+   dom->ops->detach_dev_pasid(dom, dev, pasid);
+   ioasid_put(pasid);
+}
+EXPORT_SYMBOL(iommu_disable_pasid_dma);
+
 /**
  * iommu_dma_get_resv_regions - Reserved region driver helper
  * @dev: Device from iommu_get_resv_regions()
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 24607dc3c2ac..e6cb9b52a420 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -18,6 +18,13 @@ int iommu_get_dma_cookie(struct iommu_domain *domain);
 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
+/*
+ * For devices that can do DMA request with PASID, setup a system PASID.
+ * Address modes (IOVA, PA) are selected by the platform code.
+ */
+int iommu_enable_pasid_dma(struct device *dev, ioasid_t *pasid);
+void iommu_disable_pasid_dma(struct device *dev, ioasid_t pasid);
+
 /* Setup call for arch DMA mapping code */
 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
 int iommu_dma_init_fq(struct iommu_domain *domain);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index fde5b933dbe3..fb011722e4f8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -395,6 +395,15 @@ static inline void iommu_set_dev_pasid_max(struct device 
*dev,
 
param->pasid_max = max;
 }
+static inline ioasid_t iommu_get_dev_pasid_max(struct device *dev)
+{
+   struct dev_iommu *param = dev->iommu;
+
+   if (WARN_ON(!param))
+   return 0;
+
+   return param->pasid_max;
+}
 
 int iommu_device_reg

[PATCH v2 0/8] Enable PASID for DMA API users

2022-03-14 Thread Jacob Pan

Some modern accelerators such as Intel's Data Streaming Accelerator (DSA)
require PASID in DMA requests to be operational. Specifically, the work
submissions with ENQCMD on shared work queues require PASIDs. The use cases
include both user DMA with shared virtual addressing (SVA) and in-kernel
DMA similar to legacy DMA w/o PASID. Here we address the latter.

DMA mapping API is the de facto standard for in-kernel DMA. However, it
operates on a per device or Requester ID(RID) basis which is not
PASID-aware. To leverage DMA API for devices relies on PASIDs, this
patchset introduces the following APIs

1. A driver facing API that enables DMA API PASID usage:
iommu_enable_pasid_dma(struct device *dev, ioasid_t &pasid);

2. An IOMMU op that allows attaching device-domain-PASID generically (will
be used beyond DMA API PASID support)

Once PASID DMA is enabled and attached to the appropriate IOMMU domain,
device drivers can continue to use DMA APIs as-is. There is no difference
in terms of mapping in dma_handle between without PASID and with PASID.
The DMA mapping performed by IOMMU will be identical for both requests, let
it be IOVA or PA in case of pass-through.

In addition, this set converts DSA driver in-kernel DMA with PASID from SVA
lib to DMA API. There have been security and functional issues with the
kernel SVA approach:
(https://lore.kernel.org/linux-iommu/20210511194726.gp1002...@nvidia.com/)
The highlights are as the following:
 - The lack of IOTLB synchronization upon kernel page table updates.
   (vmalloc, module/BPF loading, CONFIG_DEBUG_PAGEALLOC etc.)
 - Other than slight more protection, using kernel virtual address (KVA)
has little advantage over physical address. There are also no use cases yet
where DMA engines need kernel virtual addresses for in-kernel DMA.

Subsequently, cleanup is done around the usage of sva_bind_device() for
in-kernel DMA. Removing special casing code in VT-d driver and tightening
SVA lib API.

This work and idea behind it is a collaboration with many people, many
thanks to Baolu Lu, Jason Gunthorpe, Dave Jiang, and others.


ChangeLog:
v2
- Do not reserve a special PASID for DMA API usage. Use IOASID
  allocation instead.
- Introduced a generic device-pasid-domain attachment IOMMU op.
  Replaced the DMA API only IOMMU op.
- Removed supervisor SVA support in VT-d
- Removed unused sva_bind_device parameters
- Use IOMMU specific data instead of struct device to store PASID
  info

Jacob Pan (6):
  iommu/vt-d: Implement device_pasid domain attach ops
  iommu/vt-d: Use device_pasid attach op for RID2PASID
  iommu: Add PASID support for DMA mapping API users
  dmaengine: idxd: Use DMA API for in-kernel DMA with PASID
  iommu/vt-d: Delete supervisor/kernel SVA
  iommu: Remove unused driver data in sva_bind_device

Lu Baolu (2):
  iommu: Assign per device max PASID
  iommu: Add attach/detach_dev_pasid domain ops

 drivers/dma/idxd/cdev.c   |   2 +-
 drivers/dma/idxd/idxd.h   |   1 -
 drivers/dma/idxd/init.c   |  34 +--
 drivers/dma/idxd/sysfs.c  |   7 -
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |   2 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |   5 +-
 drivers/iommu/dma-iommu.c |  65 ++
 drivers/iommu/intel/iommu.c   | 214 --
 drivers/iommu/intel/svm.c |  51 +
 drivers/iommu/iommu.c |   4 +-
 drivers/misc/uacce/uacce.c|   2 +-
 include/linux/dma-iommu.h |   7 +
 include/linux/intel-iommu.h   |  15 +-
 include/linux/iommu.h |  37 ++-
 14 files changed, 338 insertions(+), 108 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 00/11] Fix BUG_ON in vfio_iommu_group_notifier()

2022-03-14 Thread Jason Gunthorpe via iommu

On Tue, Mar 08, 2022 at 01:44:10PM +0800, Lu Baolu wrote:
> Hi folks,
> 
> The iommu group is the minimal isolation boundary for DMA. Devices in
> a group can access each other's MMIO registers via peer to peer DMA
> and also need share the same I/O address space.

Joerg, are we good for the coming v5.18 merge window now? There are
several things backed up behind this series.

Thanks,
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 13/15] swiotlb: merge swiotlb-xen initialization into swiotlb

2022-03-14 Thread Stefano Stabellini

On Mon, 14 Mar 2022, Christoph Hellwig wrote:
> Reuse the generic swiotlb initialization for xen-swiotlb.  For ARM/ARM64
> this works trivially, while for x86 xen_swiotlb_fixup needs to be passed
> as the remap argument to swiotlb_init_remap/swiotlb_init_late.
> 
> Signed-off-by: Christoph Hellwig 

For arch/arm/xen and drivers/xen/swiotlb-xen.c

Reviewed-by: Stefano Stabellini 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 15/32] vfio: introduce KVM-owned IOMMU type

2022-03-14 Thread Jason Gunthorpe via iommu

On Mon, Mar 14, 2022 at 04:50:33PM -0600, Alex Williamson wrote:

> > +/*
> > + * The KVM_IOMMU type implies that the hypervisor will control the mappings
> > + * rather than userspace
> > + */
> > +#define VFIO_KVM_IOMMU 11
> 
> Then why is this hosted in the type1 code that exposes a wide variety
> of userspace interfaces?  Thanks,

It is really badly named, this is the root level of a 2 stage nested
IO page table, and this approach needed a special flag to distinguish
the setup from the normal iommu_domain.

If we do try to stick this into VFIO it should probably use the
VFIO_TYPE1_NESTING_IOMMU instead - however, we would like to delete
that flag entirely as it was never fully implemented, was never used,
and isn't part of what we are proposing for IOMMU nesting on ARM
anyhow. (So far I've found nobody to explain what the plan here was..)

This is why I said the second level should be an explicit iommu_domain
all on its own that is explicitly coupled to the KVM to read the page
tables, if necessary.

But I'm not sure that reading the userspace io page tables with KVM is
even the best thing to do - the iommu driver already has the pinned
memory, it would be faster and more modular to traverse the io page
tables through the pfns in the root iommu_domain than by having KVM do
the translations. Lets see what Matthew says..

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 14/15] swiotlb: remove swiotlb_init_with_tbl and swiotlb_init_late_with_tbl

2022-03-14 Thread Boris Ostrovsky




On 3/14/22 3:31 AM, Christoph Hellwig wrote:

@@ -314,6 +293,7 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
  int swiotlb_init_late(size_t size, gfp_t gfp_mask,
int (*remap)(void *tlb, unsigned long nslabs))
  {
+   struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned long bytes;
unsigned char *vstart = NULL;
@@ -355,33 +335,28 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
(PAGE_SIZE << order) >> 20);
nslabs = SLABS_PER_PAGE << order;
}
-   rc = swiotlb_late_init_with_tbl(vstart, nslabs);
-   if (rc)
-   free_pages((unsigned long)vstart, order);
-
-   return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
-   struct io_tlb_mem *mem = &io_tlb_default_mem;
-   unsigned long bytes = nslabs << IO_TLB_SHIFT;
  
-	if (swiotlb_force_disable)

-   return 0;
+   if (remap)
+   rc = remap(vstart, nslabs);
+   if (rc) {
+   free_pages((unsigned long)vstart, order);
  
-	/* protect against double initialization */

-   if (WARN_ON_ONCE(mem->nslabs))
-   return -ENOMEM;
+   /* Min is 2MB */
+   if (nslabs <= 1024)
+   return rc;
+   nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+   goto retry;
+   }



We now end up with two attempts to remap. I think this second one is what we 
want since it solves the problem I pointed in previous patch.


-boris


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 13/15] swiotlb: merge swiotlb-xen initialization into swiotlb

2022-03-14 Thread Boris Ostrovsky



On 3/14/22 3:31 AM, Christoph Hellwig wrote:

-
  static void __init pci_xen_swiotlb_init(void)
  {
if (!xen_initial_domain() && !x86_swiotlb_enable)
return;
x86_swiotlb_enable = true;
-   xen_swiotlb = true;
-   xen_swiotlb_init_early();
+   swiotlb_init_remap(true, x86_swiotlb_flags, xen_swiotlb_fixup);



I think we need to have SWIOTLB_ANY set in x86_swiotlb_flags here.




dma_ops = &xen_swiotlb_dma_ops;
if (IS_ENABLED(CONFIG_PCI))
pci_request_acs();
@@ -88,14 +85,16 @@ static void __init pci_xen_swiotlb_init(void)
  
  int pci_xen_swiotlb_init_late(void)

  {
-   int rc;
-
-   if (xen_swiotlb)
+   if (dma_ops == &xen_swiotlb_dma_ops)
return 0;
  
-	rc = xen_swiotlb_init();

-   if (rc)
-   return rc;
+   /* we can work with the default swiotlb */
+   if (!io_tlb_default_mem.nslabs) {
+   int rc = swiotlb_init_late(swiotlb_size_or_default(),
+  GFP_KERNEL, xen_swiotlb_fixup);



This may be comment for previous patch but looking at swiotlb_init_late():


retry:
    order = get_order(nslabs << IO_TLB_SHIFT);
    nslabs = SLABS_PER_PAGE << order;
    bytes = nslabs << IO_TLB_SHIFT;

    while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
    vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
  order);
    if (vstart)
    break;
    order--;
    }

    if (!vstart)
    return -ENOMEM;
    if (remap)
    rc = remap(vstart, nslabs);
    if (rc) {
    free_pages((unsigned long)vstart, order);

    /* Min is 2MB */
    if (nslabs <= 1024)
    return rc;
    nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
    goto retry;
    }

    if (order != get_order(bytes)) {
    pr_warn("only able to allocate %ld MB\n",
    (PAGE_SIZE << order) >> 20);
    nslabs = SLABS_PER_PAGE << order; <===
    }

    rc = swiotlb_late_init_with_tbl(vstart, nslabs);

Notice that we don't do remap() after final update to nslabs. We should.



-boris
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 15/32] vfio: introduce KVM-owned IOMMU type

2022-03-14 Thread Alex Williamson

On Mon, 14 Mar 2022 15:44:34 -0400
Matthew Rosato  wrote:

> s390x will introduce a new IOMMU domain type where the mappings are
> managed by KVM rather than in response to userspace mapping ioctls.  Allow
> for specifying this type on the VFIO_SET_IOMMU ioctl and triggering the
> appropriate iommu interface for overriding the default domain.
> 
> Signed-off-by: Matthew Rosato 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 12 +++-
>  include/uapi/linux/vfio.h   |  6 ++
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 9394aa9444c1..0bec97077d61 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -77,6 +77,7 @@ struct vfio_iommu {
>   boolnesting;
>   booldirty_page_tracking;
>   boolcontainer_open;
> + boolkvm;
>   struct list_heademulated_iommu_groups;
>  };
>  
> @@ -2203,7 +2204,12 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>   goto out_free_group;
>  
>   ret = -EIO;
> - domain->domain = iommu_domain_alloc(bus);
> +
> + if (iommu->kvm)
> + domain->domain = iommu_domain_alloc_type(bus, IOMMU_DOMAIN_KVM);
> + else
> + domain->domain = iommu_domain_alloc(bus);
> +
>   if (!domain->domain)
>   goto out_free_domain;
>  
> @@ -2552,6 +2558,9 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>   case VFIO_TYPE1v2_IOMMU:
>   iommu->v2 = true;
>   break;
> + case VFIO_KVM_IOMMU:
> + iommu->kvm = true;
> + break;
>   default:
>   kfree(iommu);
>   return ERR_PTR(-EINVAL);
> @@ -2637,6 +2646,7 @@ static int vfio_iommu_type1_check_extension(struct 
> vfio_iommu *iommu,
>   case VFIO_TYPE1_NESTING_IOMMU:
>   case VFIO_UNMAP_ALL:
>   case VFIO_UPDATE_VADDR:
> + case VFIO_KVM_IOMMU:
>   return 1;
>   case VFIO_DMA_CC_IOMMU:
>   if (!iommu)
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index ef33ea002b0b..666edb6957ac 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -52,6 +52,12 @@
>  /* Supports the vaddr flag for DMA map and unmap */
>  #define VFIO_UPDATE_VADDR10
>  
> +/*
> + * The KVM_IOMMU type implies that the hypervisor will control the mappings
> + * rather than userspace
> + */
> +#define VFIO_KVM_IOMMU   11

Then why is this hosted in the type1 code that exposes a wide variety
of userspace interfaces?  Thanks,

Alex

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 12/15] swiotlb: provide swiotlb_init variants that remap the buffer

2022-03-14 Thread Boris Ostrovsky




On 3/14/22 3:31 AM, Christoph Hellwig wrote:

-void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+   int (*remap)(void *tlb, unsigned long nslabs))
  {
-   size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+   unsigned long nslabs = default_nslabs;
+   size_t bytes;
void *tlb;
  
  	if (!addressing_limit && !swiotlb_force_bounce)

@@ -271,12 +273,24 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
 * allow to pick a location everywhere for hypervisors with guest
 * memory encryption.
 */
+retry:
+   bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
if (flags & SWIOTLB_ANY)
tlb = memblock_alloc(bytes, PAGE_SIZE);
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb)
goto fail;
+   if (remap && remap(tlb, nslabs) < 0) {
+   memblock_free(tlb, PAGE_ALIGN(bytes));
+
+   /* Min is 2MB */
+   if (nslabs <= 1024)



This is IO_TLB_MIN_SLABS, isn't it? (Xen code didn't say so but that's what it 
meant to say I believe)



+   panic("%s: Failed to remap %zu bytes\n",
+ __func__, bytes);
+   nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+   goto retry;
+   }

@@ -303,6 +323,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
if (swiotlb_force_disable)
return 0;
  
+retry:

order = get_order(nslabs << IO_TLB_SHIFT);
nslabs = SLABS_PER_PAGE << order;
bytes = nslabs << IO_TLB_SHIFT;
@@ -317,6 +338,17 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
  
  	if (!vstart)

return -ENOMEM;
+   if (remap)
+   rc = remap(vstart, nslabs);
+   if (rc) {
+   free_pages((unsigned long)vstart, order);
+
+   /* Min is 2MB */
+   if (nslabs <= 1024)



Same here.


-boris



+   return rc;
+   nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+   goto retry;
+   }
  
  	if (order != get_order(bytes)) {

pr_warn("only able to allocate %ld MB\n",

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 29/32] vfio-pci/zdev: add DTSM to clp group capability

2022-03-14 Thread Jason Gunthorpe via iommu

On Mon, Mar 14, 2022 at 03:44:48PM -0400, Matthew Rosato wrote:
> The DTSM, or designation type supported mask, indicates what IOAT formats
> are available to the guest.  For an interpreted device, userspace will not
> know what format(s) the IOAT assist supports, so pass it via the
> capability chain.  Since the value belongs to the Query PCI Function Group
> clp, let's extend the existing capability with a new version.

Why is this on the VFIO device?

Maybe I don't quite understand it right, but the IOAT is the
'userspace page table'?

That is something that should be modeled as a nested iommu domain.

Querying the formats and any control logic for this should be on the
iommu side not built into VFIO.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 22/32] KVM: s390: pci: routines for (dis)associating zPCI devices with a KVM

2022-03-14 Thread Jason Gunthorpe via iommu

On Mon, Mar 14, 2022 at 03:44:41PM -0400, Matthew Rosato wrote:
> +int kvm_s390_pci_zpci_start(struct kvm *kvm, struct zpci_dev *zdev)
> +{
> + struct vfio_device *vdev;
> + struct pci_dev *pdev;
> + int rc;
> +
> + rc = kvm_s390_pci_dev_open(zdev);
> + if (rc)
> + return rc;
> +
> + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
> + if (!pdev) {
> + rc = -ENODEV;
> + goto exit_err;
> + }
> +
> + vdev = get_vdev(&pdev->dev);
> + if (!vdev) {
> + pci_dev_put(pdev);
> + rc = -ENODEV;
> + goto exit_err;
> + }
> +
> + zdev->kzdev->nb.notifier_call = kvm_s390_pci_group_notifier;
> +
> + /*
> +  * At this point, a KVM should already be associated with this device,
> +  * so registering the notifier now should immediately trigger the
> +  * event.  We also want to know if the KVM association is later removed
> +  * to ensure proper cleanup happens.
> +  */
> + rc = register_notifier(vdev->dev, &zdev->kzdev->nb);
> +
> + put_vdev(vdev);
> + pci_dev_put(pdev);
> +
> + /* Make sure the registered KVM matches the KVM issuing the ioctl */
> + if (rc || zdev->kzdev->kvm != kvm) {
> + rc = -ENODEV;
> + goto exit_err;
> + }
> +
> + /* Must support KVM-managed IOMMU to proceed */
> + if (IS_ENABLED(CONFIG_S390_KVM_IOMMU))
> + rc = zpci_iommu_attach_kvm(zdev, kvm);
> + else
> + rc = -EINVAL;

This seems like kind of a strange API, shouldn't kvm be getting a
reference on the underlying iommu_domain and then calling into it to
get the mapping table instead of pushing KVM specific logic into the
iommu driver?

I would be nice if all the special kvm stuff could more isolated in
kvm code.

I'm still a little unclear about why this is so complicated - can't
you get the iommu_domain from the group FD directly in KVM code as
power does?

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 15/32] vfio: introduce KVM-owned IOMMU type

2022-03-14 Thread Jason Gunthorpe via iommu

On Mon, Mar 14, 2022 at 03:44:34PM -0400, Matthew Rosato wrote:

> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 9394aa9444c1..0bec97077d61 100644
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -77,6 +77,7 @@ struct vfio_iommu {
>   boolnesting;
>   booldirty_page_tracking;
>   boolcontainer_open;
> + boolkvm;
>   struct list_heademulated_iommu_groups;
>  };
>  
> @@ -2203,7 +2204,12 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>   goto out_free_group;
>  
>   ret = -EIO;
> - domain->domain = iommu_domain_alloc(bus);
> +
> + if (iommu->kvm)
> + domain->domain = iommu_domain_alloc_type(bus, IOMMU_DOMAIN_KVM);
> + else
> + domain->domain = iommu_domain_alloc(bus);
> +
>   if (!domain->domain)
>   goto out_free_domain;
>  
> @@ -2552,6 +2558,9 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>   case VFIO_TYPE1v2_IOMMU:
>   iommu->v2 = true;
>   break;
> + case VFIO_KVM_IOMMU:
> + iommu->kvm = true;
> + break;

Same remark for this - but more - this is called KVM but it doesn't
accept a kvm FD or any thing else to link the domain to the KVM
in-use.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 14/32] iommu: introduce iommu_domain_alloc_type and the KVM type

2022-03-14 Thread Jason Gunthorpe via iommu

On Mon, Mar 14, 2022 at 03:44:33PM -0400, Matthew Rosato wrote:
> s390x will introduce an additional domain type that is used for
> managing IOMMU owned by KVM.  Define the type here and add an
> interface for allocating a specified type vs the default type.
> 
> Signed-off-by: Matthew Rosato 
> ---
>  drivers/iommu/iommu.c |  7 +++
>  include/linux/iommu.h | 12 
>  2 files changed, 19 insertions(+)

I think the general idea is right, but I'm not keen on this as an
interface at all.

We are trying to build in iommufd a generic interface for an IOMMU
driver to expose IOMMU-device-specific domains such as this in a
general purpose way so all the platforms can get what they need.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v4 00/32] KVM: s390: enable zPCI for interpretive execution

2022-03-14 Thread Matthew Rosato


On 3/14/22 3:44 PM, Matthew Rosato wrote:

Note: A few patches in this series are dependent on Baolu's IOMMU domain ops
split, which is currently in the next branch of linux-iommu. This series
applies on top:
https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git

Enable interpretive execution of zPCI instructions + adapter interruption
forwarding for s390x KVM vfio-pci.  This is done by introducing a new IOMMU
domain for s390x (KVM-managed), indicating via vfio that this IOMMU domain
should be used instead of the default, with subsequent management of the
hardware assists being handled via a new KVM ioctl for zPCI management.

By allowing intepretation of zPCI instructions and firmware delivery of
interrupts to guests, we can significantly reduce the frequency of guest
SIE exits for zPCI.  We then see additional gains by handling a hot-path
instruction that can still intercept to the hypervisor (RPCIT) directly
in kvm via the new IOMMU domain, whose map operations update the host
DMA table with pinned guest entries over the specified range.

 From the perspective of guest configuration, you passthrough zPCI devices
in the same manner as before, with intepretation support being used by
default if available in kernel+qemu.

Will reply with a link to the associated QEMU series.


QEMU series:
https://lore.kernel.org/kvm/20220314194920.5-1-mjros...@linux.ibm.com/

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 32/32] MAINTAINERS: update s390 IOMMU entry

2022-03-14 Thread Matthew Rosato

Use wildcard to pick up new parts added by KVM domain support.

Signed-off-by: Matthew Rosato 
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6c76eb66b10a..d803f490eafb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16867,7 +16867,7 @@ M:  Gerald Schaefer 
 L: linux-s...@vger.kernel.org
 S: Supported
 W: http://www.ibm.com/developerworks/linux/linux390/
-F: drivers/iommu/s390-iommu.c
+F: drivers/iommu/s390*
 
 S390 IUCV NETWORK LAYER
 M: Alexandra Winter 
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 31/32] MAINTAINERS: additional files related kvm s390 pci passthrough

2022-03-14 Thread Matthew Rosato

Add entries from the s390 kvm subdirectory related to pci passthrough.

Acked-by: Christian Borntraeger 
Signed-off-by: Matthew Rosato 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e127c2fb08a7..6c76eb66b10a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16928,6 +16928,8 @@ M:  Eric Farman 
 L: linux-s...@vger.kernel.org
 L: k...@vger.kernel.org
 S: Supported
+F: arch/s390/include/asm/kvm_pci.h
+F: arch/s390/kvm/pci*
 F: drivers/vfio/pci/vfio_pci_zdev.c
 F: include/uapi/linux/vfio_zdev.h
 
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 30/32] KVM: s390: introduce CPU feature for zPCI Interpretation

2022-03-14 Thread Matthew Rosato

KVM_S390_VM_CPU_FEAT_ZPCI_INTERP relays whether zPCI interpretive
execution is possible based on the available hardware facilities.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/uapi/asm/kvm.h | 1 +
 arch/s390/kvm/kvm-s390.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 7a6b14874d65..ed06458a871f 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -130,6 +130,7 @@ struct kvm_s390_vm_cpu_machine {
 #define KVM_S390_VM_CPU_FEAT_PFMFI 11
 #define KVM_S390_VM_CPU_FEAT_SIGPIF12
 #define KVM_S390_VM_CPU_FEAT_KSS   13
+#define KVM_S390_VM_CPU_FEAT_ZPCI_INTERP 14
 struct kvm_s390_vm_cpu_feat {
__u64 feat[16];
 };
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 613101ba29be..137ab8c09b82 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -434,6 +434,12 @@ static void kvm_s390_cpu_feat_init(void)
if (test_facility(151)) /* DFLTCC */
__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 
+   /* zPCI Interpretation */
+   if (IS_ENABLED(CONFIG_VFIO_PCI) && IS_ENABLED(CONFIG_S390_KVM_IOMMU) &&
+   test_facility(69) && test_facility(70) && test_facility(71) &&
+   test_facility(72))
+   allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ZPCI_INTERP);
+
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 29/32] vfio-pci/zdev: add DTSM to clp group capability

2022-03-14 Thread Matthew Rosato

The DTSM, or designation type supported mask, indicates what IOAT formats
are available to the guest.  For an interpreted device, userspace will not
know what format(s) the IOAT assist supports, so pass it via the
capability chain.  Since the value belongs to the Query PCI Function Group
clp, let's extend the existing capability with a new version.

Reviewed-by: Pierre Morel 
Signed-off-by: Matthew Rosato 
---
 drivers/vfio/pci/vfio_pci_zdev.c | 12 ++--
 include/uapi/linux/vfio_zdev.h   |  3 +++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index 4a653ce480c7..aadd2b58b822 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -44,16 +45,23 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct 
vfio_info_cap *caps)
 {
struct vfio_device_info_cap_zpci_group cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
-   .header.version = 1,
+   .header.version = 2,
.dasm = zdev->dma_mask,
.msi_addr = zdev->msi_addr,
.flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH,
.mui = zdev->fmb_update,
.noi = zdev->max_msi,
.maxstbl = ZPCI_MAX_WRITE_SIZE,
-   .version = zdev->version
+   .version = zdev->version,
+   .dtsm = 0
};
 
+   /* Some values are different for interpreted devices */
+   if (zdev->kzdev) {
+   cap.maxstbl = zdev->maxstbl;
+   cap.dtsm = kvm_s390_pci_get_dtsm(zdev);
+   }
+
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
 }
 
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
index 78c022af3d29..29351687e914 100644
--- a/include/uapi/linux/vfio_zdev.h
+++ b/include/uapi/linux/vfio_zdev.h
@@ -50,6 +50,9 @@ struct vfio_device_info_cap_zpci_group {
__u16 noi;  /* Maximum number of MSIs */
__u16 maxstbl;  /* Maximum Store Block Length */
__u8 version;   /* Supported PCI Version */
+   /* End of version 1 */
+   __u8 dtsm;  /* Supported IOAT Designations */
+   /* End of version 2 */
 };
 
 /**
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 28/32] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices

2022-03-14 Thread Matthew Rosato

The KVM_S390_ZPCI_OP ioctl provides a series of operations that
can be invoked to manage hardware-assisted virtualization features
for s390x PCI passthrough.

Signed-off-by: Matthew Rosato 
---
 Documentation/virt/kvm/api.rst | 60 ++
 arch/s390/kvm/kvm-s390.c   | 26 
 arch/s390/kvm/pci.c| 77 ++
 arch/s390/kvm/pci.h|  3 +-
 include/uapi/linux/kvm.h   | 43 +++
 5 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9f3172376ec3..c642ff891cf2 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5574,6 +5574,66 @@ enabled with ``arch_prctl()``, but this may change in 
the future.
 The offsets of the state save areas in struct kvm_xsave follow the contents
 of CPUID leaf 0xD on the host.
 
+4.134 KVM_S390_ZPCI_OP
+
+
+:Capability: KVM_CAP_S390_ZPCI_OP
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_zpci_op (in, out)
+:Returns: 0 on success, <0 on error
+
+Used to manage hardware-assisted virtualization features for zPCI devices.
+
+Parameters are specified via the following structure::
+
+  struct kvm_s390_zpci_op {
+   /* in */
+   __u32 fh;   /* target device */
+   __u8  op;   /* operation to perform */
+   __u8  pad[3];
+   union {
+   /* for KVM_S390_ZPCIOP_REG_INT */
+   struct {
+   __u64 ibv;  /* Guest addr of interrupt bit vector */
+   __u64 sb;   /* Guest addr of summary bit */
+   __u32 flags;
+   __u32 noi;  /* Number of interrupts */
+   __u8 isc;   /* Guest interrupt subclass */
+   __u8 sbo;   /* Offset of guest summary bit vector */
+   __u16 pad;
+   } reg_int;
+   /* for KVM_S390_ZPCIOP_REG_IOAT */
+   struct {
+   __u64 iota; /* I/O Translation settings */
+   } reg_ioat;
+   __u8 reserved[64];
+   } u;
+   /* out */
+   __u32 newfh;/* updated device handle */
+  };
+
+The type of operation is specified in the "op" field.
+KVM_S390_ZPCIOP_INIT is used to assocaite a zPCI function with this vm.
+Conversely, KVM_S390_ZPCIOP_END is used to terminate that association.
+KVM_S390_ZPCIOP_START_INTERP is used to enable interpretive execution
+for the specified zPCI function for this VM; KVM_S390_ZPCIOP_STOP_INTERP
+is used to subsequently disable interpretive execution.
+KVM_S390_ZPCIOP_REG_INT is used to register the VM for adapter interruption
+forwarding, which will allow firmware delivery of interrupts directly to
+the vm, with KVM providing a backup delivery mechanism;
+KVM_S390_ZPCIOP_DEREG_INT is used to subsequently disable interrupt forwarding.
+KVM_S390_ZPCIOP_REG_IOAT is used to enable KVM-managed IOMMU ops to begin
+synchronizing guest and host DMA tables; KVM_S390_ZPCIOP_DEREG_IOAT is used
+to subsequently disable IOMMU mapping.
+
+The target zPCI function must also be specified via the "fh" field.  For the
+KVM_S390_ZPCIOP_REG_INT operation, additional information to establish the
+interrupt forwarding must be provided via the "reg_int" struct.  For the
+KVM_S390_ZPCIOP_REG_IOAT operation, guest table format and location must be
+specified via the "reg_ioat" struct.
+
+The "reserved" field is meant for future extensions.
 
 5. The kvm_run structure
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 84acaf59a7d3..613101ba29be 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -616,6 +616,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_PROTECTED:
r = is_prot_virt_host();
break;
+   case KVM_CAP_S390_ZPCI_OP:
+   if (IS_ENABLED(CONFIG_S390_KVM_IOMMU) && test_facility(69) &&
+   test_facility(70) && test_facility(71) &&
+   test_facility(72)) {
+   r = 1;
+   } else {
+   r = 0;
+   }
+   break;
default:
r = 0;
}
@@ -2532,6 +2541,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
break;
}
+   case KVM_S390_ZPCI_OP: {
+   struct kvm_s390_zpci_op args;
+
+   r = -EINVAL;
+   if (!IS_ENABLED(CONFIG_VFIO_PCI))
+   break;
+   if (copy_from_user(&args, argp, sizeof(args))) {
+   r = -EFAULT;
+   break;
+   }
+   r = kvm_s390_pci_zpci_op(kvm, &args);
+   if (r)
+   break;
+   if (copy_to_user(a

[PATCH v4 27/32] KVM: s390: intercept the rpcit instruction

2022-03-14 Thread Matthew Rosato

For faster handling of PCI translation refreshes, intercept in KVM
and call the associated handler.

Signed-off-by: Matthew Rosato 
---
 arch/s390/kvm/priv.c | 46 
 1 file changed, 46 insertions(+)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 417154b314a6..546c99a0e0b6 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -29,6 +29,7 @@
 #include 
 #include "gaccess.h"
 #include "kvm-s390.h"
+#include "pci.h"
 #include "trace.h"
 
 static int handle_ri(struct kvm_vcpu *vcpu)
@@ -335,6 +336,49 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
return 0;
 }
 
+static int handle_rpcit(struct kvm_vcpu *vcpu)
+{
+   int reg1, reg2;
+   int rc;
+
+   if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+   return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+   /* KVM can only handle if we support KVM-managed IOMMU */
+   if (!IS_ENABLED(CONFIG_S390_KVM_IOMMU))
+   return -EOPNOTSUPP;
+
+   kvm_s390_get_regs_rre(vcpu, ®1, ®2);
+
+   /* If the device has a SHM bit on, let userspace take care of this */
+   if (((vcpu->run->s.regs.gprs[reg1] >> 32) & aift->mdd) != 0)
+   return -EOPNOTSUPP;
+
+   rc = kvm_s390_pci_refresh_trans(vcpu, vcpu->run->s.regs.gprs[reg1],
+   vcpu->run->s.regs.gprs[reg2],
+   vcpu->run->s.regs.gprs[reg2 + 1]);
+
+   switch (rc) {
+   case 0:
+   kvm_s390_set_psw_cc(vcpu, 0);
+   break;
+   case -ENOMEM:
+   vcpu->run->s.regs.gprs[reg1] &= 0x00ffUL;
+   vcpu->run->s.regs.gprs[reg1] |= (u64)4 << 24;
+   kvm_s390_set_psw_cc(vcpu, 1);
+   break;
+   case -EIO:
+   vcpu->run->s.regs.gprs[reg1] &= 0x00ffUL;
+   vcpu->run->s.regs.gprs[reg1] |= (u64)16 << 24;
+   kvm_s390_set_psw_cc(vcpu, 1);
+   break;
+   default:
+   kvm_s390_set_psw_cc(vcpu, 3);
+   }
+
+   return 0;
+}
+
 #define SSKE_NQ 0x8
 #define SSKE_MR 0x4
 #define SSKE_MC 0x2
@@ -1275,6 +1319,8 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
return handle_essa(vcpu);
case 0xaf:
return handle_pfmf(vcpu);
+   case 0xd3:
+   return handle_rpcit(vcpu);
default:
return -EOPNOTSUPP;
}
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 26/32] KVM: s390: pci: handle refresh of PCI translations

2022-03-14 Thread Matthew Rosato

Add a routine that will perform a shadow operation between a guest
and host IOAT.  A subsequent patch will invoke this in response to
an 04 RPCIT instruction intercept.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_pci.h |  1 +
 arch/s390/kvm/pci.c | 31 ++-
 arch/s390/kvm/pci.h |  3 +++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index e27dbede723c..9578b5dafb45 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -25,6 +25,7 @@ struct kvm_zdev {
struct zpci_fib fib;
struct notifier_block nb;
struct list_head entry;
+   u64 rpcit_count;
 };
 
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 1a8b82220b29..40d2fadbfbd5 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -8,6 +8,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -173,6 +174,30 @@ int kvm_s390_pci_aen_init(u8 nisc)
return rc;
 }
 
+int kvm_s390_pci_refresh_trans(struct kvm_vcpu *vcpu, unsigned long req,
+  unsigned long start, unsigned long size)
+{
+   struct kvm_zdev *kzdev;
+   u32 fh = req >> 32;
+   int rc;
+
+   /* Make sure this is a valid device associated with this guest */
+   kzdev = get_kzdev_by_fh(vcpu->kvm, fh);
+   if (!kzdev)
+   return -EINVAL;
+
+   /*
+* The KVM-managed IOMMU map operation will synchronize the associated
+* guest IOAT tables with the host DMA tables.  A physical address is
+* not specified as it will be derived from pinned guest PTEs
+*/
+   rc = iommu_map(kzdev->dom, start, 0, size, IOMMU_WRITE | IOMMU_READ);
+
+   kzdev->rpcit_count++;
+
+   return rc;
+}
+
 /* Modify PCI: Register floating adapter interruption forwarding */
 static int kvm_zpci_set_airq(struct zpci_dev *zdev)
 {
@@ -716,6 +741,8 @@ void kvm_s390_pci_clear_list(struct kvm *kvm)
 
 int kvm_s390_pci_init(void)
 {
+   int rc;
+
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
if (!aift)
return -ENOMEM;
@@ -723,5 +750,7 @@ int kvm_s390_pci_init(void)
spin_lock_init(&aift->gait_lock);
mutex_init(&aift->aift_lock);
 
-   return 0;
+   rc = zpci_get_mdd(&aift->mdd);
+
+   return rc;
 }
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
index 867f04cae3a1..2cb1b27396c1 100644
--- a/arch/s390/kvm/pci.h
+++ b/arch/s390/kvm/pci.h
@@ -33,6 +33,7 @@ struct zpci_aift {
struct kvm_zdev **kzdev;
spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
struct mutex aift_lock; /* Protects the other structures in aift */
+   u32 mdd;
 };
 
 extern struct zpci_aift *aift;
@@ -48,6 +49,8 @@ static inline struct kvm *kvm_s390_pci_si_to_kvm(struct 
zpci_aift *aift,
 
 int kvm_s390_pci_aen_init(u8 nisc);
 void kvm_s390_pci_aen_exit(void);
+int kvm_s390_pci_refresh_trans(struct kvm_vcpu *vcpu, unsigned long req,
+  unsigned long start, unsigned long end);
 
 int kvm_s390_pci_zpci_start(struct kvm *kvm, struct zpci_dev *zdev);
 int kvm_s390_pci_zpci_stop(struct kvm *kvm, struct zpci_dev *zdev);
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 25/32] KVM: s390: pci: provide routines for enabling/disabling IOAT assist

2022-03-14 Thread Matthew Rosato

These routines will be wired into a kvm ioctl in orer to respond to
requests to enable / disable a device for PCI I/O Address Translation
assistance via a KVM-managed IOMMU.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_pci.h |  2 ++
 arch/s390/kvm/pci.c | 25 +
 arch/s390/kvm/pci.h |  2 ++
 3 files changed, 29 insertions(+)

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ed596880fb06..e27dbede723c 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -30,6 +30,8 @@ struct kvm_zdev {
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
 void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
 
+u8 kvm_s390_pci_get_dtsm(struct zpci_dev *zdev);
+
 int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);
 int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota);
 int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 2287c1c6a3e5..1a8b82220b29 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -367,6 +367,28 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, 
bool force)
return rc;
 }
 
+static int kvm_s390_pci_ioat_enable(struct zpci_dev *zdev, u64 iota)
+{
+   if (IS_ENABLED(CONFIG_S390_KVM_IOMMU))
+   return zpci_iommu_kvm_assign_iota(zdev, iota);
+   else
+   return -EINVAL;
+}
+
+static int kvm_s390_pci_ioat_disable(struct zpci_dev *zdev)
+{
+   if (IS_ENABLED(CONFIG_S390_KVM_IOMMU))
+   return zpci_iommu_kvm_remove_iota(zdev);
+   else
+   return -EINVAL;
+}
+
+u8 kvm_s390_pci_get_dtsm(struct zpci_dev *zdev)
+{
+   return (zdev->dtsm & KVM_S390_PCI_DTSM_MASK);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_get_dtsm);
+
 static int kvm_s390_pci_interp_enable(struct zpci_dev *zdev)
 {
u32 gisa;
@@ -432,6 +454,9 @@ static int kvm_s390_pci_interp_disable(struct zpci_dev 
*zdev, bool force)
if (zdev->kzdev->fib.fmt0.aibv != 0)
kvm_s390_pci_aif_disable(zdev, force);
 
+   /* If we are using the IOAT assist, disable it now */
+   kvm_s390_pci_ioat_disable(zdev);
+
/* Remove the host CLP guest designation */
zdev->gisa = 0;
 
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
index a95d9fdc91be..867f04cae3a1 100644
--- a/arch/s390/kvm/pci.h
+++ b/arch/s390/kvm/pci.h
@@ -16,6 +16,8 @@
 #include 
 #include 
 
+#define KVM_S390_PCI_DTSM_MASK 0x40
+
 struct zpci_gaite {
u32 gisa;
u8 gisc;
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 24/32] KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding

2022-03-14 Thread Matthew Rosato

These routines will be wired into a kvm ioctl in order to respond to
requests to enable / disable a device for Adapter Event Notifications /
Adapter Interuption Forwarding.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_pci.h |   2 +
 arch/s390/kvm/pci.c | 201 +++-
 arch/s390/pci/pci_insn.c|   1 +
 3 files changed, 203 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index 47ce18b5bddd..ed596880fb06 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -16,11 +16,13 @@
 #include 
 #include 
 #include 
+#include 
 
 struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
+   struct zpci_fib fib;
struct notifier_block nb;
struct list_head entry;
 };
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index df50dd6114c3..2287c1c6a3e5 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "pci.h"
 #include "kvm-s390.h"
@@ -172,6 +173,200 @@ int kvm_s390_pci_aen_init(u8 nisc)
return rc;
 }
 
+/* Modify PCI: Register floating adapter interruption forwarding */
+static int kvm_zpci_set_airq(struct zpci_dev *zdev)
+{
+   u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+   struct zpci_fib fib = {};
+   u8 status;
+
+   fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
+   fib.fmt0.sum = 1;   /* enable summary notifications */
+   fib.fmt0.noi = airq_iv_end(zdev->aibv);
+   fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+   fib.fmt0.aibvo = 0;
+   fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+   fib.fmt0.aisbo = zdev->aisb & 63;
+   fib.gd = zdev->gisa;
+
+   return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruption forwarding */
+static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
+{
+   u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+   struct zpci_fib fib = {};
+   u8 cc, status;
+
+   fib.gd = zdev->gisa;
+
+   cc = zpci_mod_fc(req, &fib, &status);
+   if (cc == 3 || (cc == 1 && status == 24))
+   /* Function already gone or IRQs already deregistered. */
+   cc = 0;
+
+   return cc ? -EIO : 0;
+}
+
+static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+  bool assist)
+{
+   struct page *aibv_page, *aisb_page = NULL;
+   unsigned int msi_vecs, idx;
+   struct zpci_gaite *gaite;
+   unsigned long bit;
+   struct kvm *kvm;
+   phys_addr_t gaddr;
+   int rc = 0, gisc;
+
+   /*
+* Interrupt forwarding is only applicable if the device is already
+* enabled for interpretation
+*/
+   if (zdev->gisa == 0)
+   return -EINVAL;
+
+   kvm = zdev->kzdev->kvm;
+   msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
+
+   /* Get the associated forwarding ISC - if invalid, return the error */
+   gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
+   if (gisc < 0)
+   return gisc;
+
+   /* Replace AIBV address */
+   idx = srcu_read_lock(&kvm->srcu);
+   aibv_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
+   srcu_read_unlock(&kvm->srcu, idx);
+   if (is_error_page(aibv_page)) {
+   rc = -EIO;
+   goto out;
+   }
+   gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
+   fib->fmt0.aibv = gaddr;
+
+   /* Pin the guest AISB if one was specified */
+   if (fib->fmt0.sum == 1) {
+   idx = srcu_read_lock(&kvm->srcu);
+   aisb_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
+   srcu_read_unlock(&kvm->srcu, idx);
+   if (is_error_page(aisb_page)) {
+   rc = -EIO;
+   goto unpin1;
+   }
+   }
+
+   /* AISB must be allocated before we can fill in GAITE */
+   mutex_lock(&aift->aift_lock);
+   bit = airq_iv_alloc_bit(aift->sbv);
+   if (bit == -1UL)
+   goto unpin2;
+   zdev->aisb = bit; /* store the summary bit number */
+   zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
+   AIRQ_IV_BITLOCK |
+   AIRQ_IV_GUESTVEC,
+   phys_to_virt(fib->fmt0.aibv));
+
+   spin_lock_irq(&aift->gait_lock);
+   gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+  sizeof(struct zpci_gaite));
+
+   /* If assist not requested, host will get all alerts */
+   if (assist)
+   gaite->gisa = (u32)virt_to_ph

[PATCH v4 23/32] KVM: s390: pci: provide routines for enabling/disabling interpretation

2022-03-14 Thread Matthew Rosato

These routines will be wired into a kvm ioctl in order to respond to
requests to enable / disable a device for zPCI Load/Store intepretation.

The first time such a request is received, enable the necessary facilities
for the guest.

Signed-off-by: Matthew Rosato 
---
 arch/s390/kvm/pci.c | 86 +
 arch/s390/pci/pci.c |  3 ++
 2 files changed, 89 insertions(+)

diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 28fe95f13c33..df50dd6114c3 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -13,7 +13,9 @@
 #include 
 #include 
 #include 
+#include 
 #include "pci.h"
+#include "kvm-s390.h"
 
 struct zpci_aift *aift;
 
@@ -170,6 +172,87 @@ int kvm_s390_pci_aen_init(u8 nisc)
return rc;
 }
 
+static int kvm_s390_pci_interp_enable(struct zpci_dev *zdev)
+{
+   u32 gisa;
+   int rc;
+
+   if (!zdev->kzdev || !zdev->kzdev->kvm)
+   return -EINVAL;
+
+   /*
+* If this is the first request to use an interpreted device, make the
+* necessary vcpu changes
+*/
+   if (!zdev->kzdev->kvm->arch.use_zpci_interp)
+   kvm_s390_vcpu_pci_enable_interp(zdev->kzdev->kvm);
+
+   /*
+* In the event of a system reset in userspace, the GISA designation
+* may still be assigned because the device is still enabled.
+* Verify it's the same guest before proceeding.
+*/
+   gisa = (u32)virt_to_phys(&zdev->kzdev->kvm->arch.sie_page2->gisa);
+   if (zdev->gisa != 0 && zdev->gisa != gisa)
+   return -EPERM;
+
+   if (zdev_enabled(zdev)) {
+   zdev->gisa = 0;
+   rc = zpci_disable_device(zdev);
+   if (rc)
+   return rc;
+   }
+
+   /*
+* Store information about the identity of the kvm guest allowed to
+* access this device via interpretation to be used by host CLP
+*/
+   zdev->gisa = gisa;
+
+   rc = zpci_enable_device(zdev);
+   if (rc)
+   goto err;
+
+   /* Re-register the IOMMU that was already created */
+   rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+   virt_to_phys(zdev->dma_table));
+   if (rc)
+   goto err;
+
+   return rc;
+
+err:
+   zdev->gisa = 0;
+   return rc;
+}
+
+static int kvm_s390_pci_interp_disable(struct zpci_dev *zdev)
+{
+   int rc;
+
+   if (zdev->gisa == 0)
+   return -EINVAL;
+
+   /* Remove the host CLP guest designation */
+   zdev->gisa = 0;
+
+   if (zdev_enabled(zdev)) {
+   rc = zpci_disable_device(zdev);
+   if (rc)
+   return rc;
+   }
+
+   rc = zpci_enable_device(zdev);
+   if (rc)
+   return rc;
+
+   /* Re-register the IOMMU that was already created */
+   rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+   virt_to_phys(zdev->dma_table));
+
+   return rc;
+}
+
 static int kvm_s390_pci_group_notifier(struct notifier_block *nb,
   unsigned long action, void *data)
 {
@@ -203,6 +286,9 @@ void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
 {
struct kvm_zdev *kzdev;
 
+   if (zdev->gisa != 0)
+   kvm_s390_pci_interp_disable(zdev, true);
+
kzdev = zdev->kzdev;
WARN_ON(kzdev->zdev != zdev);
zdev->kzdev = 0;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 13033717cd4e..5dbe49ec325e 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -147,6 +147,7 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, 
cc, status);
return cc;
 }
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
 
 /* Modify PCI: Unregister I/O address translation parameters */
 int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
@@ -727,6 +728,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
zpci_update_fh(zdev, fh);
return rc;
 }
+EXPORT_SYMBOL_GPL(zpci_enable_device);
 
 int zpci_disable_device(struct zpci_dev *zdev)
 {
@@ -750,6 +752,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
}
return rc;
 }
+EXPORT_SYMBOL_GPL(zpci_disable_device);
 
 /**
  * zpci_hot_reset_device - perform a reset of the given zPCI function
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 22/32] KVM: s390: pci: routines for (dis)associating zPCI devices with a KVM

2022-03-14 Thread Matthew Rosato

These routines will be wired into a KVM ioctl, to be issued from
userspace to (dis)associate a specific zPCI device with the issuing
KVM.  This will create/delete a relationship between KVM, zPCI device
and the associated IOMMU domain for the device.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_host.h |   2 +
 arch/s390/include/asm/kvm_pci.h  |   2 +
 arch/s390/kvm/kvm-s390.c |   5 +
 arch/s390/kvm/pci.c  | 225 +++
 arch/s390/kvm/pci.h  |   5 +
 5 files changed, 239 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index bf61ab05f98c..bd171abbb8ef 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -965,6 +965,8 @@ struct kvm_arch{
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
struct kvm_s390_pv pv;
+   struct list_head kzdev_list;
+   spinlock_t kzdev_list_lock;
 };
 
 #define KVM_HVA_ERR_BAD(-1UL)
diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ebc0da5d9ac1..47ce18b5bddd 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -21,6 +21,8 @@ struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
+   struct notifier_block nb;
+   struct list_head entry;
 };
 
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d91b2547f0bf..84acaf59a7d3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2775,6 +2775,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
kvm_s390_crypto_init(kvm);
 
+   if (IS_ENABLED(CONFIG_VFIO_PCI))
+   kvm_s390_pci_init_list(kvm);
+
mutex_init(&kvm->arch.float_int.ais_lock);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -2860,6 +2863,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
+   if (IS_ENABLED(CONFIG_VFIO_PCI))
+   kvm_s390_pci_clear_list(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 1c42d25de697..28fe95f13c33 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -23,6 +24,22 @@ static inline int __set_irq_noiib(u16 ctl, u8 isc)
return zpci_set_irq_ctrl(ctl, isc, &iib);
 }
 
+static struct kvm_zdev *get_kzdev_by_fh(struct kvm *kvm, u32 fh)
+{
+   struct kvm_zdev *kzdev, *retval = NULL;
+
+   spin_lock(&kvm->arch.kzdev_list_lock);
+   list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+   if (kzdev->zdev->fh == fh) {
+   retval = kzdev;
+   break;
+   }
+   }
+   spin_unlock(&kvm->arch.kzdev_list_lock);
+
+   return retval;
+}
+
 /* Caller must hold the aift lock before calling this function */
 void kvm_s390_pci_aen_exit(void)
 {
@@ -153,6 +170,20 @@ int kvm_s390_pci_aen_init(u8 nisc)
return rc;
 }
 
+static int kvm_s390_pci_group_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct kvm_zdev *kzdev = container_of(nb, struct kvm_zdev, nb);
+
+   if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
+   if (!data || !kzdev->zdev)
+   return NOTIFY_DONE;
+   kzdev->kvm = data;
+   }
+
+   return NOTIFY_OK;
+}
+
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
 {
struct kvm_zdev *kzdev;
@@ -179,6 +210,200 @@ void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
 }
 EXPORT_SYMBOL_GPL(kvm_s390_pci_dev_release);
 
+static struct vfio_device *get_vdev(struct device *dev)
+{
+   struct vfio_device *(*fn)(struct device *dev);
+   struct vfio_device *vdev;
+
+   fn = symbol_get(vfio_device_get_from_dev);
+   if (!fn)
+   return NULL;
+
+   vdev = fn(dev);
+
+   symbol_put(vfio_device_get_from_dev);
+
+   return vdev;
+}
+
+static void put_vdev(struct vfio_device *vdev)
+{
+   void (*fn)(struct vfio_device *vdev);
+
+   fn = symbol_get(vfio_device_put);
+   if (!fn)
+   return;
+
+   fn(vdev);
+
+   symbol_put(vfio_device_put);
+}
+
+static int register_notifier(struct device *dev, struct notifier_block *nb)
+{
+   int (*fn)(struct device *dev, enum vfio_notify_type type,
+ unsigned long *events, struct notifier_block *nb);
+   unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
+   int rc;
+
+   fn =

[PATCH v4 21/32] KVM: s390: mechanism to enable guest zPCI Interpretation

2022-03-14 Thread Matthew Rosato

The guest must have access to certain facilities in order to allow
interpretive execution of zPCI instructions and adapter event
notifications.  However, there are some cases where a guest might
disable interpretation -- provide a mechanism via which we can defer
enabling the associated zPCI interpretation facilities until the guest
indicates it wishes to use them.

Acked-by: Pierre Morel 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_host.h |  4 
 arch/s390/kvm/kvm-s390.c | 41 
 arch/s390/kvm/kvm-s390.h | 10 
 3 files changed, 55 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b468d3a2215e..bf61ab05f98c 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -252,7 +252,10 @@ struct kvm_s390_sie_block {
 #define ECB2_IEP   0x20
 #define ECB2_PFMFI 0x08
 #define ECB2_ESCA  0x04
+#define ECB2_ZPCI_LSI  0x02
__u8ecb2;   /* 0x0062 */
+#define ECB3_AISI  0x20
+#define ECB3_AISII 0x10
 #define ECB3_DEA 0x08
 #define ECB3_AES 0x04
 #define ECB3_RI  0x01
@@ -938,6 +941,7 @@ struct kvm_arch{
int use_cmma;
int use_pfmfi;
int use_skf;
+   int use_zpci_interp;
int user_cpu_state_ctrl;
int user_sigp;
int user_stsi;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 32e75f6f4e4d..d91b2547f0bf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1029,6 +1029,45 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, 
struct kvm_device_attr *attr)
return 0;
 }
 
+static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
+{
+   /* Only set the ECB bits after guest requests zPCI interpretation */
+   if (!vcpu->kvm->arch.use_zpci_interp)
+   return;
+
+   vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
+   vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
+}
+
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
+{
+   struct kvm_vcpu *vcpu;
+   unsigned long i;
+
+   /*
+* If host is configured for PCI and the necessary facilities are
+* available, turn on interpretation for the life of this guest
+*/
+   if (!sclp.has_zpci_lsi || !sclp.has_aisii || !sclp.has_aeni ||
+   !sclp.has_aisi || !IS_ENABLED(CONFIG_VFIO_PCI) ||
+   !IS_ENABLED(CONFIG_S390_KVM_IOMMU))
+   return;
+
+   mutex_lock(&kvm->lock);
+
+   kvm->arch.use_zpci_interp = 1;
+
+   kvm_s390_vcpu_block_all(kvm);
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   kvm_s390_vcpu_pci_setup(vcpu);
+   kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+   }
+
+   kvm_s390_vcpu_unblock_all(kvm);
+   mutex_unlock(&kvm->lock);
+}
+
 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 {
unsigned long cx;
@@ -3236,6 +3275,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 
kvm_s390_vcpu_crypto_setup(vcpu);
 
+   kvm_s390_vcpu_pci_setup(vcpu);
+
mutex_lock(&vcpu->kvm->lock);
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 098831e815e6..14bb2539f837 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -496,6 +496,16 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
  */
 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
 
+/**
+ * kvm_s390_vcpu_pci_enable_interp
+ *
+ * Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
+ * interpretation as well as adapter interruption forwarding.
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
+
 /**
  * diag9c_forwarding_hz
  *
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 20/32] KVM: s390: pci: enable host forwarding of Adapter Event Notifications

2022-03-14 Thread Matthew Rosato

In cases where interrupts are not forwarded to the guest via firmware,
KVM is responsible for ensuring delivery.  When an interrupt presents
with the forwarding bit, we must process the forwarding tables until
all interrupts are delivered.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/include/asm/tpi.h  | 13 ++
 arch/s390/kvm/interrupt.c| 77 +++-
 arch/s390/kvm/kvm-s390.c |  3 +-
 arch/s390/kvm/pci.h  | 10 +
 5 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a22c9266ea05..b468d3a2215e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -757,6 +757,7 @@ struct kvm_vm_stat {
u64 inject_pfault_done;
u64 inject_service_signal;
u64 inject_virtio;
+   u64 aen_forward;
 };
 
 struct kvm_arch_memory_slot {
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index 1ac538b8cbf5..f76e5fdff23a 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -19,6 +19,19 @@ struct tpi_info {
u32 :12;
 } __packed __aligned(4);
 
+/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
+struct tpi_adapter_info {
+   u32 aism:8;
+   u32 :22;
+   u32 error:1;
+   u32 forward:1;
+   u32 reserved;
+   u32 adapter_IO:1;
+   u32 directed_irq:1;
+   u32 isc:3;
+   u32 :27;
+} __packed __aligned(4);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 17c7deb516d2..513b393d5d0d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3271,11 +3271,86 @@ int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc)
 }
 EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
 
+static void aen_host_forward(unsigned long si)
+{
+   struct kvm_s390_gisa_interrupt *gi;
+   struct zpci_gaite *gaite;
+   struct kvm *kvm;
+
+   gaite = (struct zpci_gaite *)aift->gait +
+   (si * sizeof(struct zpci_gaite));
+   if (gaite->count == 0)
+   return;
+   if (gaite->aisb != 0)
+   set_bit_inv(gaite->aisbo, (unsigned long *)gaite->aisb);
+
+   kvm = kvm_s390_pci_si_to_kvm(aift, si);
+   if (!kvm)
+   return;
+   gi = &kvm->arch.gisa_int;
+
+   if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
+   !(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
+   gisa_set_ipm_gisc(gi->origin, gaite->gisc);
+   if (hrtimer_active(&gi->timer))
+   hrtimer_cancel(&gi->timer);
+   hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+   kvm->stat.aen_forward++;
+   }
+}
+
+static void aen_process_gait(u8 isc)
+{
+   bool found = false, first = true;
+   union zpci_sic_iib iib = {{0}};
+   unsigned long si, flags;
+
+   spin_lock_irqsave(&aift->gait_lock, flags);
+
+   if (!aift->gait) {
+   spin_unlock_irqrestore(&aift->gait_lock, flags);
+   return;
+   }
+
+   for (si = 0;;) {
+   /* Scan adapter summary indicator bit vector */
+   si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
+   if (si == -1UL) {
+   if (first || found) {
+   /* Re-enable interrupts. */
+   zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
+ &iib);
+   first = found = false;
+   } else {
+   /* Interrupts on and all bits processed */
+   break;
+   }
+   found = false;
+   si = 0;
+   /* Scan again after re-enabling interrupts */
+   continue;
+   }
+   found = true;
+   aen_host_forward(si);
+   }
+
+   spin_unlock_irqrestore(&aift->gait_lock, flags);
+}
+
 static void gib_alert_irq_handler(struct airq_struct *airq,
  struct tpi_info *tpi_info)
 {
+   struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
+
inc_irq_stat(IRQIO_GAL);
-   process_gib_alert_list();
+
+   if (IS_ENABLED(CONFIG_VFIO_PCI) && (info->forward || info->error)) {
+   aen_process_gait(info->isc);
+   if (info->aism != 0)
+   process_gib_alert_list();
+   } else {
+   process_gib_alert_list();
+   }
 }
 
 static struct airq_struct gib_alert_irq = {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d89cd16b57dd..32e75f6f4e4d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -65,7 +65,8 @@ const struct

[PATCH v4 19/32] KVM: s390: pci: do initial setup for AEN interpretation

2022-03-14 Thread Matthew Rosato

Initial setup for Adapter Event Notification Interpretation for zPCI
passthrough devices.  Specifically, allocate a structure for forwarding of
adapter events and pass the address of this structure to firmware.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci.h  |   4 +
 arch/s390/include/asm/pci_insn.h |  12 +++
 arch/s390/kvm/interrupt.c|  14 +++
 arch/s390/kvm/kvm-s390.c |   9 ++
 arch/s390/kvm/pci.c  | 154 +++
 arch/s390/kvm/pci.h  |  42 +
 arch/s390/pci/pci.c  |   6 ++
 7 files changed, 241 insertions(+)
 create mode 100644 arch/s390/kvm/pci.h

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 4faff673078b..1ae49330d1c8 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define PCIBIOS_MIN_IO 0x1000
@@ -204,6 +205,9 @@ extern const struct attribute_group *zpci_attr_groups[];
 extern unsigned int s390_pci_force_floating __initdata;
 extern unsigned int s390_pci_no_rid;
 
+extern union zpci_sic_iib *zpci_aipb;
+extern struct airq_iv *zpci_aif_sbv;
+
 /* 
-
   Prototypes
 - 
*/
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 32759c407b8f..ad9000295c82 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -101,6 +101,7 @@ struct zpci_fib {
 /* Set Interruption Controls Operation Controls  */
 #defineSIC_IRQ_MODE_ALL0
 #defineSIC_IRQ_MODE_SINGLE 1
+#defineSIC_SET_AENI_CONTROLS   2
 #defineSIC_IRQ_MODE_DIRECT 4
 #defineSIC_IRQ_MODE_D_ALL  16
 #defineSIC_IRQ_MODE_D_SINGLE   17
@@ -127,9 +128,20 @@ struct zpci_cdiib {
u64 : 64;
 } __packed __aligned(8);
 
+/* adapter interruption parameters block */
+struct zpci_aipb {
+   u64 faisb;
+   u64 gait;
+   u16 : 13;
+   u16 afi : 3;
+   u32 : 32;
+   u16 faal;
+} __packed __aligned(8);
+
 union zpci_sic_iib {
struct zpci_diib diib;
struct zpci_cdiib cdiib;
+   struct zpci_aipb aipb;
 };
 
 DECLARE_STATIC_KEY_FALSE(have_mio);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 65e75ca2fc5d..17c7deb516d2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -32,6 +32,7 @@
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
+#include "pci.h"
 
 #define PFAULT_INIT 0x0600
 #define PFAULT_DONE 0x0680
@@ -3286,6 +3287,11 @@ void kvm_s390_gib_destroy(void)
 {
if (!gib)
return;
+   if (IS_ENABLED(CONFIG_VFIO_PCI) && sclp.has_aeni && aift) {
+   mutex_lock(&aift->aift_lock);
+   kvm_s390_pci_aen_exit();
+   mutex_unlock(&aift->aift_lock);
+   }
chsc_sgib(0);
unregister_adapter_interrupt(&gib_alert_irq);
free_page((unsigned long)gib);
@@ -3323,6 +3329,14 @@ int kvm_s390_gib_init(u8 nisc)
goto out_unreg_gal;
}
 
+   if (IS_ENABLED(CONFIG_VFIO_PCI) && sclp.has_aeni) {
+   if (kvm_s390_pci_aen_init(nisc)) {
+   pr_err("Initializing AEN for PCI failed\n");
+   rc = -EIO;
+   goto out_unreg_gal;
+   }
+   }
+
KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
goto out;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2296b1ff1e02..d89cd16b57dd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -48,6 +48,7 @@
 #include 
 #include "kvm-s390.h"
 #include "gaccess.h"
+#include "pci.h"
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -503,6 +504,14 @@ int kvm_arch_init(void *opaque)
goto out;
}
 
+   if (IS_ENABLED(CONFIG_VFIO_PCI)) {
+   rc = kvm_s390_pci_init();
+   if (rc) {
+   pr_err("Unable to allocate AIFT for PCI\n");
+   goto out;
+   }
+   }
+
rc = kvm_s390_gib_init(GAL_ISC);
if (rc)
goto out;
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 612faf87126d..1c42d25de697 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -10,6 +10,148 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include "pci.h"
+
+struct zpci_aift *aift;
+
+static inline int __set_irq_noiib(u16 ctl, u8 isc)
+{
+   union zpci_sic_iib iib = {{0}};
+
+   return zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+/* Caller must hold the aift lock before calling this function */
+void kvm_s390_pci_aen_exit(void)
+{
+   unsigned long flags;
+   struct kvm_zdev **gait_kzdev;
+

[PATCH v4 18/32] iommu/s390: add support for IOMMU_DOMAIN_KVM

2022-03-14 Thread Matthew Rosato

Add an alternate domain ops for type IOMMU_DOMAIN_KVM.  This type is
intended for use when KVM is managing the IOMMU domain on behalf of a
VM.  Mapping can only be performed once a KVM is registered with the
domain as well as a guest IOTA (address translation anchor).

The map operation is expected to be received in response to an
04 intercept of a guest RPCIT instruction, and will perform a
synchronization operation between the host DMA and guest DMA tables
over the range specified.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_pci.h |   6 +
 arch/s390/include/asm/pci_dma.h |   3 +
 drivers/iommu/Kconfig   |   8 +
 drivers/iommu/Makefile  |   1 +
 drivers/iommu/s390-iommu.c  |  49 ++--
 drivers/iommu/s390-iommu.h  |  53 
 drivers/iommu/s390-kvm-iommu.c  | 469 
 7 files changed, 562 insertions(+), 27 deletions(-)
 create mode 100644 drivers/iommu/s390-iommu.h
 create mode 100644 drivers/iommu/s390-kvm-iommu.c

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ae8669105f72..ebc0da5d9ac1 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -11,6 +11,7 @@
 #define ASM_KVM_PCI_H
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -19,9 +20,14 @@
 struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
+   struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
 };
 
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
 void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
 
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota);
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev);
+
 #endif /* ASM_KVM_PCI_H */
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..38004e0a4383 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -50,6 +50,9 @@ enum zpci_ioat_dtype {
 #define ZPCI_TABLE_ALIGN   ZPCI_TABLE_SIZE
 #define ZPCI_TABLE_ENTRY_SIZE  (sizeof(unsigned long))
 #define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / 
ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_TABLE_PAGES   (ZPCI_TABLE_SIZE >> PAGE_SHIFT)
+#define ZPCI_TABLE_ENTRIES_PAGES   (ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES)
+#define ZPCI_TABLE_ENTRIES_PER_PAGE(ZPCI_TABLE_ENTRIES / ZPCI_TABLE_PAGES)
 
 #define ZPCI_TABLE_BITS11
 #define ZPCI_PT_BITS   8
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..9637f73925ec 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -411,6 +411,14 @@ config S390_AP_IOMMU
  Enables bits of IOMMU API required by VFIO. The iommu_ops
  is not implemented as it is not necessary for VFIO.
 
+config S390_KVM_IOMMU
+   bool "S390 KVM IOMMU Support"
+   depends on S390_IOMMU && KVM || COMPILE_TEST
+   select IOMMU_API
+   help
+ Extends the S390 IOMMU API to support a domain owned and managed by
+ KVM. This allows KVM to manage nested mappings vs userspace.
+
 config MTK_IOMMU
tristate "MediaTek IOMMU Support"
depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0..5476e978d7f5 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+obj-$(CONFIG_S390_KVM_IOMMU) += s390-kvm-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 73a85c599dc2..0ead37f6e232 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include "s390-iommu.h"
 
 /*
  * Physically contiguous memory regions can be mapped with 4 KiB alignment,
@@ -21,24 +22,6 @@
 
 static const struct iommu_ops s390_iommu_ops;
 
-struct s390_domain {
-   struct iommu_domain domain;
-   struct list_headdevices;
-   unsigned long   *dma_table;
-   spinlock_t  dma_table_lock;
-   spinlock_t  list_lock;
-};
-
-struct s390_domain_device {
-   struct list_headlist;
-   struct zpci_dev *zdev;
-};
-
-static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
-{
-   return container_of(dom, struct s390_domain, domain);
-}
-
 static bool s390_iommu_capable(enum iommu_cap cap)
 {
switch (cap) {
@@ -55,7 +38,12 @@ static struct iommu_domain *s390_domain_alloc(unsigned 
domain_type)
 {
struct s390_domain *s390_domain;
 
-   if (d

[PATCH v4 17/32] KVM: s390: pci: add basic kvm_zdev structure

2022-03-14 Thread Matthew Rosato

This structure will be used to carry kvm passthrough information related to
zPCI devices.

Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/kvm_pci.h | 27 +++
 arch/s390/include/asm/pci.h |  3 +++
 arch/s390/kvm/Makefile  |  1 +
 arch/s390/kvm/pci.c | 38 +
 4 files changed, 69 insertions(+)
 create mode 100644 arch/s390/include/asm/kvm_pci.h
 create mode 100644 arch/s390/kvm/pci.c

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
new file mode 100644
index ..ae8669105f72
--- /dev/null
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM PCI Passthrough for virtual machines on s390
+ *
+ * Copyright IBM Corp. 2022
+ *
+ *Author(s): Matthew Rosato 
+ */
+
+#ifndef ASM_KVM_PCI_H
+#define ASM_KVM_PCI_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct kvm_zdev {
+   struct zpci_dev *zdev;
+   struct kvm *kvm;
+};
+
+int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
+void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
+
+#endif /* ASM_KVM_PCI_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index e8a3fd5bc169..4faff673078b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -97,6 +97,7 @@ struct zpci_bar_struct {
 };
 
 struct s390_domain;
+struct kvm_zdev;
 
 #define ZPCI_FUNCTIONS_PER_BUS 256
 struct zpci_bus {
@@ -190,6 +191,8 @@ struct zpci_dev {
struct dentry   *debugfs_dev;
 
struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+
+   struct kvm_zdev *kzdev; /* passthrough data */
 };
 
 static inline bool zdev_enabled(struct zpci_dev *zdev)
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 26f4a74e5ce4..00cf6853d93f 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -10,4 +10,5 @@ ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
 kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
 
+kvm-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
new file mode 100644
index ..612faf87126d
--- /dev/null
+++ b/arch/s390/kvm/pci.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ *Author(s): Matthew Rosato 
+ */
+
+#include 
+#include 
+#include 
+
+int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
+{
+   struct kvm_zdev *kzdev;
+
+   kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
+   if (!kzdev)
+   return -ENOMEM;
+
+   kzdev->zdev = zdev;
+   zdev->kzdev = kzdev;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_dev_open);
+
+void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
+{
+   struct kvm_zdev *kzdev;
+
+   kzdev = zdev->kzdev;
+   WARN_ON(kzdev->zdev != zdev);
+   zdev->kzdev = 0;
+   kfree(kzdev);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_dev_release);
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 16/32] vfio-pci/zdev: add function handle to clp base capability

2022-03-14 Thread Matthew Rosato

The function handle is a system-wide unique identifier for a zPCI
device.  It is used as input for various zPCI operations.

Signed-off-by: Matthew Rosato 
---
 drivers/vfio/pci/vfio_pci_zdev.c | 5 +++--
 include/uapi/linux/vfio_zdev.h   | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index ea4c0d2b0663..4a653ce480c7 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -23,14 +23,15 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct 
vfio_info_cap *caps)
 {
struct vfio_device_info_cap_zpci_base cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
-   .header.version = 1,
+   .header.version = 2,
.start_dma = zdev->start_dma,
.end_dma = zdev->end_dma,
.pchid = zdev->pchid,
.vfn = zdev->vfn,
.fmb_length = zdev->fmb_length,
.pft = zdev->pft,
-   .gid = zdev->pfgid
+   .gid = zdev->pfgid,
+   .fh = zdev->fh
};
 
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
index b4309397b6b2..78c022af3d29 100644
--- a/include/uapi/linux/vfio_zdev.h
+++ b/include/uapi/linux/vfio_zdev.h
@@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base {
__u16 fmb_length;   /* Measurement Block Length (in bytes) */
__u8 pft;   /* PCI Function Type */
__u8 gid;   /* PCI function group ID */
+   /* End of version 1 */
+   __u32 fh;   /* PCI function handle */
+   /* End of version 2 */
 };
 
 /**
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 15/32] vfio: introduce KVM-owned IOMMU type

2022-03-14 Thread Matthew Rosato

s390x will introduce a new IOMMU domain type where the mappings are
managed by KVM rather than in response to userspace mapping ioctls.  Allow
for specifying this type on the VFIO_SET_IOMMU ioctl and triggering the
appropriate iommu interface for overriding the default domain.

Signed-off-by: Matthew Rosato 
---
 drivers/vfio/vfio_iommu_type1.c | 12 +++-
 include/uapi/linux/vfio.h   |  6 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9394aa9444c1..0bec97077d61 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -77,6 +77,7 @@ struct vfio_iommu {
boolnesting;
booldirty_page_tracking;
boolcontainer_open;
+   boolkvm;
struct list_heademulated_iommu_groups;
 };
 
@@ -2203,7 +2204,12 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
goto out_free_group;
 
ret = -EIO;
-   domain->domain = iommu_domain_alloc(bus);
+
+   if (iommu->kvm)
+   domain->domain = iommu_domain_alloc_type(bus, IOMMU_DOMAIN_KVM);
+   else
+   domain->domain = iommu_domain_alloc(bus);
+
if (!domain->domain)
goto out_free_domain;
 
@@ -2552,6 +2558,9 @@ static void *vfio_iommu_type1_open(unsigned long arg)
case VFIO_TYPE1v2_IOMMU:
iommu->v2 = true;
break;
+   case VFIO_KVM_IOMMU:
+   iommu->kvm = true;
+   break;
default:
kfree(iommu);
return ERR_PTR(-EINVAL);
@@ -2637,6 +2646,7 @@ static int vfio_iommu_type1_check_extension(struct 
vfio_iommu *iommu,
case VFIO_TYPE1_NESTING_IOMMU:
case VFIO_UNMAP_ALL:
case VFIO_UPDATE_VADDR:
+   case VFIO_KVM_IOMMU:
return 1;
case VFIO_DMA_CC_IOMMU:
if (!iommu)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ef33ea002b0b..666edb6957ac 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -52,6 +52,12 @@
 /* Supports the vaddr flag for DMA map and unmap */
 #define VFIO_UPDATE_VADDR  10
 
+/*
+ * The KVM_IOMMU type implies that the hypervisor will control the mappings
+ * rather than userspace
+ */
+#define VFIO_KVM_IOMMU 11
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 14/32] iommu: introduce iommu_domain_alloc_type and the KVM type

2022-03-14 Thread Matthew Rosato

s390x will introduce an additional domain type that is used for
managing IOMMU owned by KVM.  Define the type here and add an
interface for allocating a specified type vs the default type.

Signed-off-by: Matthew Rosato 
---
 drivers/iommu/iommu.c |  7 +++
 include/linux/iommu.h | 12 
 2 files changed, 19 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f2c45b85b9fc..8bb57e0e3945 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1976,6 +1976,13 @@ void iommu_domain_free(struct iommu_domain *domain)
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
 
+struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
+unsigned int t)
+{
+   return __iommu_domain_alloc(bus, t);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_alloc_type);
+
 static int __iommu_attach_device(struct iommu_domain *domain,
 struct device *dev)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9208eca4b0d1..b427bbb9f387 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -63,6 +63,7 @@ struct iommu_domain_geometry {
  implementation  */
 #define __IOMMU_DOMAIN_PT  (1U << 2)  /* Domain is identity mapped   */
 #define __IOMMU_DOMAIN_DMA_FQ  (1U << 3)  /* DMA-API uses flush queue*/
+#define __IOMMU_DOMAIN_KVM (1U << 4)  /* Domain is controlled by KVM */
 
 /*
  * This are the possible domain-types
@@ -77,6 +78,7 @@ struct iommu_domain_geometry {
  *   certain optimizations for these domains
  * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB
  *   invalidation.
+ * IOMMU_DOMAIN_KVM- DMA mappings managed by KVM, used for VMs
  */
 #define IOMMU_DOMAIN_BLOCKED   (0U)
 #define IOMMU_DOMAIN_IDENTITY  (__IOMMU_DOMAIN_PT)
@@ -86,6 +88,8 @@ struct iommu_domain_geometry {
 #define IOMMU_DOMAIN_DMA_FQ(__IOMMU_DOMAIN_PAGING |\
 __IOMMU_DOMAIN_DMA_API |   \
 __IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_KVM   (__IOMMU_DOMAIN_PAGING |\
+__IOMMU_DOMAIN_KVM)
 
 struct iommu_domain {
unsigned type;
@@ -421,6 +425,8 @@ extern bool iommu_capable(struct bus_type *bus, enum 
iommu_cap cap);
 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
 extern struct iommu_group *iommu_group_get_by_id(int id);
 extern void iommu_domain_free(struct iommu_domain *domain);
+extern struct iommu_domain *iommu_domain_alloc_type(struct bus_type *bus,
+   unsigned int t);
 extern int iommu_attach_device(struct iommu_domain *domain,
   struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
@@ -708,6 +714,12 @@ static inline void iommu_domain_free(struct iommu_domain 
*domain)
 {
 }
 
+static inline struct iommu_domain *iommu_domain_alloc_type(struct bus_type 
*bus,
+  unsigned int t)
+{
+   return NULL;
+}
+
 static inline int iommu_attach_device(struct iommu_domain *domain,
  struct device *dev)
 {
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 13/32] s390/pci: return status from zpci_refresh_trans

2022-03-14 Thread Matthew Rosato

Current callers of zpci_refresh_trans don't need to interrogate the status
returned from the underlying instructions.  However, a subsequent patch
will add a KVM caller that needs this information.  Add a new argument to
zpci_refresh_trans to pass the address of a status byte and update
existing call sites to provide it.

Reviewed-by: Pierre Morel 
Reviewed-by: Claudio Imbrenda 
Reviewed-by: Niklas Schnelle 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci_insn.h |  2 +-
 arch/s390/pci/pci_dma.c  |  6 --
 arch/s390/pci/pci_insn.c | 10 +-
 drivers/iommu/s390-iommu.c   |  4 +++-
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 5331082fa516..32759c407b8f 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -135,7 +135,7 @@ union zpci_sic_iib {
 DECLARE_STATIC_KEY_FALSE(have_mio);
 
 u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
-int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range, u8 *status);
 int __zpci_load(u64 *data, u64 req, u64 offset);
 int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len);
 int __zpci_store(u64 data, u64 req, u64 offset);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index a81de48d5ea7..b0a2380bcad8 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -23,8 +23,9 @@ static u32 s390_iommu_aperture_factor = 1;
 
 static int zpci_refresh_global(struct zpci_dev *zdev)
 {
+   u8 status;
return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
- zdev->iommu_pages * PAGE_SIZE);
+ zdev->iommu_pages * PAGE_SIZE, &status);
 }
 
 unsigned long *dma_alloc_cpu_table(void)
@@ -183,6 +184,7 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, 
dma_addr_t dma_addr,
   size_t size, int flags)
 {
unsigned long irqflags;
+   u8 status;
int ret;
 
/*
@@ -201,7 +203,7 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, 
dma_addr_t dma_addr,
}
 
ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
-PAGE_ALIGN(size));
+PAGE_ALIGN(size), &status);
if (ret == -ENOMEM && !s390_iommu_strict) {
/* enable the hypervisor to free some resources */
if (zpci_refresh_global(zdev))
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 0509554301c7..ca6399d52767 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -77,20 +77,20 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 
*status)
return cc;
 }
 
-int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range, u8 *status)
 {
-   u8 cc, status;
+   u8 cc;
 
do {
-   cc = __rpcit(fn, addr, range, &status);
+   cc = __rpcit(fn, addr, range, status);
if (cc == 2)
udelay(ZPCI_INSN_BUSY_DELAY);
} while (cc == 2);
 
if (cc)
-   zpci_err_insn(cc, status, addr, range);
+   zpci_err_insn(cc, *status, addr, range);
 
-   if (cc == 1 && (status == 4 || status == 16))
+   if (cc == 1 && (*status == 4 || *status == 16))
return -ENOMEM;
 
return (cc) ? -EIO : 0;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 3833e86c6e7b..73a85c599dc2 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -214,6 +214,7 @@ static int s390_iommu_update_trans(struct s390_domain 
*s390_domain,
unsigned long irq_flags, nr_pages, i;
unsigned long *entry;
int rc = 0;
+   u8 status;
 
if (dma_addr < s390_domain->domain.geometry.aperture_start ||
dma_addr + size > s390_domain->domain.geometry.aperture_end)
@@ -238,7 +239,8 @@ static int s390_iommu_update_trans(struct s390_domain 
*s390_domain,
spin_lock(&s390_domain->list_lock);
list_for_each_entry(domain_device, &s390_domain->devices, list) {
rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
-   start_dma_addr, nr_pages * PAGE_SIZE);
+   start_dma_addr, nr_pages * PAGE_SIZE,
+   &status);
if (rc)
break;
}
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 12/32] s390/pci: get SHM information from list pci

2022-03-14 Thread Matthew Rosato

KVM will need information on the special handle mask used to indicate
emulated devices.  In order to obtain this, a new type of list pci call
must be made to gather the information.  Extend clp_list_pci_req to
also fetch the model-dependent-data field that holds this mask.

Reviewed-by: Niklas Schnelle 
Acked-by: Pierre Morel 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/include/asm/pci_clp.h |  2 +-
 arch/s390/pci/pci_clp.c | 25 ++---
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 3c0b9986dcdc..e8a3fd5bc169 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -227,6 +227,7 @@ int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 
nr_dma_as);
 int clp_disable_fh(struct zpci_dev *zdev, u32 *fh);
 int clp_get_state(u32 fid, enum zpci_state *state);
 int clp_refresh_fh(u32 fid, u32 *fh);
+int zpci_get_mdd(u32 *mdd);
 
 /* UID */
 void update_uid_checking(bool new);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d6189ed14f84..dc2041e97de4 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -76,7 +76,7 @@ struct clp_req_list_pci {
 struct clp_rsp_list_pci {
struct clp_rsp_hdr hdr;
u64 resume_token;
-   u32 reserved2;
+   u32 mdd;
u16 max_fn;
u8  : 7;
u8 uid_checking : 1;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index dc733b58e74f..7477956be632 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -328,7 +328,7 @@ int clp_disable_fh(struct zpci_dev *zdev, u32 *fh)
 }
 
 static int clp_list_pci_req(struct clp_req_rsp_list_pci *rrb,
-   u64 *resume_token, int *nentries)
+   u64 *resume_token, int *nentries, u32 *mdd)
 {
int rc;
 
@@ -354,6 +354,8 @@ static int clp_list_pci_req(struct clp_req_rsp_list_pci 
*rrb,
*nentries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
rrb->response.entry_size;
*resume_token = rrb->response.resume_token;
+   if (mdd)
+   *mdd = rrb->response.mdd;
 
return rc;
 }
@@ -365,7 +367,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, 
void *data,
int nentries, i, rc;
 
do {
-   rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+   rc = clp_list_pci_req(rrb, &resume_token, &nentries, NULL);
if (rc)
return rc;
for (i = 0; i < nentries; i++)
@@ -383,7 +385,7 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, 
u32 fid,
int nentries, i, rc;
 
do {
-   rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+   rc = clp_list_pci_req(rrb, &resume_token, &nentries, NULL);
if (rc)
return rc;
fh_list = rrb->response.fh_list;
@@ -468,6 +470,23 @@ int clp_get_state(u32 fid, enum zpci_state *state)
return rc;
 }
 
+int zpci_get_mdd(u32 *mdd)
+{
+   struct clp_req_rsp_list_pci *rrb;
+   u64 resume_token = 0;
+   int nentries, rc;
+
+   rrb = clp_alloc_block(GFP_KERNEL);
+   if (!rrb)
+   return -ENOMEM;
+
+   rc = clp_list_pci_req(rrb, &resume_token, &nentries, mdd);
+
+   clp_free_block(rrb);
+   return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_get_mdd);
+
 static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
 {
unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 11/32] s390/pci: add helper function to find device by handle

2022-03-14 Thread Matthew Rosato

Intercepted zPCI instructions will specify the desired function via a
function handle.  Add a routine to find the device with the specified
handle.

Acked-by: Niklas Schnelle 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Eric Farman 
Reviewed-by: Pierre Morel 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c | 16 
 2 files changed, 17 insertions(+)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 7ee52a70a96f..3c0b9986dcdc 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -275,6 +275,7 @@ static inline struct zpci_dev *to_zpci_dev(struct device 
*dev)
 }
 
 struct zpci_dev *get_zdev_by_fid(u32);
+struct zpci_dev *get_zdev_by_fh(u32 fh);
 
 /* DMA */
 int zpci_dma_init(void);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ca9c29386de6..04c16312ad54 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -76,6 +76,22 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
return zdev;
 }
 
+struct zpci_dev *get_zdev_by_fh(u32 fh)
+{
+   struct zpci_dev *tmp, *zdev = NULL;
+
+   spin_lock(&zpci_list_lock);
+   list_for_each_entry(tmp, &zpci_list, entry) {
+   if (tmp->fh == fh) {
+   zdev = tmp;
+   break;
+   }
+   }
+   spin_unlock(&zpci_list_lock);
+   return zdev;
+}
+EXPORT_SYMBOL_GPL(get_zdev_by_fh);
+
 void zpci_remove_reserved_devices(void)
 {
struct zpci_dev *tmp, *zdev;
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 10/32] s390/pci: stash dtsm and maxstbl

2022-03-14 Thread Matthew Rosato

Store information about what IOAT designation types are supported by
underlying hardware as well as the largest store block size allowed.
These values will be needed by passthrough.

Reviewed-by: Niklas Schnelle 
Reviewed-by: Pierre Morel 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci.h | 2 ++
 arch/s390/include/asm/pci_clp.h | 6 --
 arch/s390/pci/pci_clp.c | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index d07d7c3205de..7ee52a70a96f 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -126,9 +126,11 @@ struct zpci_dev {
u32 gisa;   /* GISA designation for passthrough */
u16 vfn;/* virtual function number */
u16 pchid;  /* physical channel ID */
+   u16 maxstbl;/* Maximum store block size */
u8  pfgid;  /* function group ID */
u8  pft;/* pci function type */
u8  port;
+   u8  dtsm;   /* Supported DT mask */
u8  rid_available   : 1;
u8  has_hp_slot : 1;
u8  has_resources   : 1;
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index f3286bc5ba6e..d6189ed14f84 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -153,9 +153,11 @@ struct clp_rsp_query_pci_grp {
u8  :  6;
u8 frame:  1;
u8 refresh  :  1;   /* TLB refresh mode */
-   u16 reserved2;
+   u16 :  3;
+   u16 maxstbl : 13;   /* Maximum store block size */
u16 mui;
-   u16 : 16;
+   u8 dtsm;/* Supported DT mask */
+   u8 reserved3;
u16 maxfaal;
u16 :  4;
u16 dnoi: 12;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 4dcc37ddeeaf..dc733b58e74f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -103,6 +103,8 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
zdev->version = response->version;
+   zdev->maxstbl = response->maxstbl;
+   zdev->dtsm = response->dtsm;
 
switch (response->version) {
case 1:
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 09/32] s390/pci: export some routines related to RPCIT processing

2022-03-14 Thread Matthew Rosato

KVM will re-use dma_walk_cpu_trans to walk the host shadow table and
will also need to be able to call zpci_refresh_trans to re-issue a RPCIT.

Reviewed-by: Niklas Schnelle 
Reviewed-by: Pierre Morel 
Acked-by: Christian Borntraeger 
Signed-off-by: Matthew Rosato 
---
 arch/s390/pci/pci_dma.c  | 1 +
 arch/s390/pci/pci_insn.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f46833a25526..a81de48d5ea7 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -116,6 +116,7 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, 
dma_addr_t dma_addr)
px = calc_px(dma_addr);
return &pto[px];
 }
+EXPORT_SYMBOL_GPL(dma_walk_cpu_trans);
 
 void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int 
flags)
 {
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 2a47b3936e44..0509554301c7 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -95,6 +95,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 
return (cc) ? -EIO : 0;
 }
+EXPORT_SYMBOL_GPL(zpci_refresh_trans);
 
 /* Set Interruption Controls */
 int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 08/32] s390/pci: stash associated GISA designation

2022-03-14 Thread Matthew Rosato

For passthrough devices, we will need to know the GISA designation of the
guest if interpretation facilities are to be used.  Setup to stash this in
the zdev and set a default of 0 (no GISA designation) for now; a subsequent
patch will set a valid GISA designation for passthrough devices.
Also, extend mpcific routines to specify this stashed designation as part
of the mpcific command.

Reviewed-by: Niklas Schnelle 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Eric Farman 
Reviewed-by: Pierre Morel 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci.h | 1 +
 arch/s390/include/asm/pci_clp.h | 3 ++-
 arch/s390/pci/pci.c | 6 ++
 arch/s390/pci/pci_clp.c | 1 +
 arch/s390/pci/pci_irq.c | 5 +
 5 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 90824be5ce9a..d07d7c3205de 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -123,6 +123,7 @@ struct zpci_dev {
enum zpci_state state;
u32 fid;/* function ID, used by sclp */
u32 fh; /* function handle, used by insn's */
+   u32 gisa;   /* GISA designation for passthrough */
u16 vfn;/* virtual function number */
u16 pchid;  /* physical channel ID */
u8  pfgid;  /* function group ID */
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 1f4b666e85ee..f3286bc5ba6e 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -173,7 +173,8 @@ struct clp_req_set_pci {
u16 reserved2;
u8 oc;  /* operation controls */
u8 ndas;/* number of dma spaces */
-   u64 reserved3;
+   u32 reserved3;
+   u32 gisa;   /* GISA designation */
 } __packed;
 
 /* Set PCI function response */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 792f8e0f2178..ca9c29386de6 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -119,6 +119,7 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+   fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc)
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, 
cc, status);
@@ -132,6 +133,8 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
struct zpci_fib fib = {0};
u8 cc, status;
 
+   fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc)
zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, 
cc, status);
@@ -159,6 +162,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&zdev->unmapped_pages, 0);
 
fib.fmb_addr = virt_to_phys(zdev->fmb);
+   fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc) {
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
@@ -177,6 +181,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
if (!zdev->fmb)
return -EINVAL;
 
+   fib.gd = zdev->gisa;
+
/* Function measurement is disabled if fmb address is zero */
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3) /* Function already gone. */
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index be077b39da33..4dcc37ddeeaf 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -240,6 +240,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, 
u8 nr_dma_as, u8 comma
rrb->request.fh = zdev->fh;
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
+   rrb->request.gisa = zdev->gisa;
 
rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 2f675355fd0c..a19ac0282929 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -43,6 +43,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
+   fib.gd = zdev->gisa;
 
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
 }
@@ -54,6 +55,8 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
struct zpci_fib fib = {0};
u8 cc, status;
 
+   fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -72,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)

[PATCH v4 07/32] s390/pci: externalize the SIC operation controls and routine

2022-03-14 Thread Matthew Rosato

A subsequent patch will be issuing SIC from KVM -- export the necessary
routine and make the operation control definitions available from a header.
Because the routine will now be exported, let's rename __zpci_set_irq_ctrl
to zpci_set_irq_ctrl and get rid of the zero'd iib wrapper function of
the same name.

Reviewed-by: Niklas Schnelle 
Reviewed-by: Claudio Imbrenda 
Reviewed-by: Pierre Morel 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/pci_insn.h | 17 +
 arch/s390/pci/pci_insn.c |  3 ++-
 arch/s390/pci/pci_irq.c  | 26 --
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 61cf9531f68f..5331082fa516 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -98,6 +98,14 @@ struct zpci_fib {
u32 gd;
 } __packed __aligned(8);
 
+/* Set Interruption Controls Operation Controls  */
+#defineSIC_IRQ_MODE_ALL0
+#defineSIC_IRQ_MODE_SINGLE 1
+#defineSIC_IRQ_MODE_DIRECT 4
+#defineSIC_IRQ_MODE_D_ALL  16
+#defineSIC_IRQ_MODE_D_SINGLE   17
+#defineSIC_IRQ_MODE_SET_CPU18
+
 /* directed interruption information block */
 struct zpci_diib {
u32 : 1;
@@ -134,13 +142,6 @@ int __zpci_store(u64 data, u64 req, u64 offset);
 int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
 int __zpci_store_block(const u64 *data, u64 req, u64 offset);
 void zpci_barrier(void);
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
-
-static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
-{
-   union zpci_sic_iib iib = {{0}};
-
-   return __zpci_set_irq_ctrl(ctl, isc, &iib);
-}
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
 
 #endif
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 4dd58b196cea..2a47b3936e44 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -97,7 +97,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 }
 
 /* Set Interruption Controls */
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
 {
if (!test_facility(72))
return -EIO;
@@ -108,6 +108,7 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib 
*iib)
 
return 0;
 }
+EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
 
 /* PCI Load */
 static inline int pcilg(u64 *data, u64 req, u64 offset, u8 *status)
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 0d0a02a9fbbf..2f675355fd0c 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -15,13 +15,6 @@
 
 static enum {FLOATING, DIRECTED} irq_delivery;
 
-#defineSIC_IRQ_MODE_ALL0
-#defineSIC_IRQ_MODE_SINGLE 1
-#defineSIC_IRQ_MODE_DIRECT 4
-#defineSIC_IRQ_MODE_D_ALL  16
-#defineSIC_IRQ_MODE_D_SINGLE   17
-#defineSIC_IRQ_MODE_SET_CPU18
-
 /*
  * summary bit vector
  * FLOATING - summary bit per function
@@ -154,6 +147,7 @@ static struct irq_chip zpci_irq_chip = {
 static void zpci_handle_cpu_local_irq(bool rescan)
 {
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+   union zpci_sic_iib iib = {{0}};
unsigned long bit;
int irqs_on = 0;
 
@@ -165,7 +159,7 @@ static void zpci_handle_cpu_local_irq(bool rescan)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
-   if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+   if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, 
&iib))
break;
bit = 0;
continue;
@@ -193,6 +187,7 @@ static void zpci_handle_remote_irq(void *data)
 static void zpci_handle_fallback_irq(void)
 {
struct cpu_irq_data *cpu_data;
+   union zpci_sic_iib iib = {{0}};
unsigned long cpu;
int irqs_on = 0;
 
@@ -203,7 +198,7 @@ static void zpci_handle_fallback_irq(void)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
-   if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+   if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, 
&iib))
break;
cpu = 0;
continue;
@@ -234,6 +229,7 @@ static void zpci_directed_irq_handler(struct airq_struct 
*airq,
 static void zpci_floating_irq_handler(struct airq_struct *airq,
  struct tpi_inf

[PATCH v4 06/32] s390/airq: allow for airq structure that uses an input vector

2022-03-14 Thread Matthew Rosato

When doing device passthrough where interrupts are being forwarded from
host to guest, we wish to use a pinned section of guest memory as the
vector (the same memory used by the guest as the vector). To accomplish
this, add a new parameter for airq_iv_create which allows passing an
existing vector to be used instead of allocating a new one. The caller
is responsible for ensuring the vector is pinned in memory as well as for
unpinning the memory when the vector is no longer needed.

A subsequent patch will use this new parameter for zPCI interpretation.

Reviewed-by: Pierre Morel 
Reviewed-by: Claudio Imbrenda 
Acked-by: Cornelia Huck 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/airq.h |  4 +++-
 arch/s390/pci/pci_irq.c  |  8 
 drivers/s390/cio/airq.c  | 10 +++---
 drivers/s390/virtio/virtio_ccw.c |  2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 7918a7d09028..e82e5626e139 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -47,8 +47,10 @@ struct airq_iv {
 #define AIRQ_IV_PTR4   /* Allocate the ptr array */
 #define AIRQ_IV_DATA   8   /* Allocate the data array */
 #define AIRQ_IV_CACHELINE  16  /* Cacheline alignment for the vector */
+#define AIRQ_IV_GUESTVEC   32  /* Vector is a pinned guest page */
 
-struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
+  unsigned long *vec);
 void airq_iv_release(struct airq_iv *iv);
 unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
 void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index cc4c8d7c8f5c..0d0a02a9fbbf 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -296,7 +296,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int 
type)
zdev->aisb = bit;
 
/* Create adapter interrupt vector */
-   zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | 
AIRQ_IV_BITLOCK);
+   zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | 
AIRQ_IV_BITLOCK, NULL);
if (!zdev->aibv)
return -ENOMEM;
 
@@ -419,7 +419,7 @@ static int __init zpci_directed_irq_init(void)
union zpci_sic_iib iib = {{0}};
unsigned int cpu;
 
-   zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+   zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
if (!zpci_sbv)
return -ENOMEM;
 
@@ -441,7 +441,7 @@ static int __init zpci_directed_irq_init(void)
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * 
BITS_PER_BYTE,
   AIRQ_IV_DATA |
   AIRQ_IV_CACHELINE |
-  (!cpu ? AIRQ_IV_ALLOC : 0));
+  (!cpu ? AIRQ_IV_ALLOC : 0), 
NULL);
if (!zpci_ibv[cpu])
return -ENOMEM;
}
@@ -458,7 +458,7 @@ static int __init zpci_floating_irq_init(void)
if (!zpci_ibv)
return -ENOMEM;
 
-   zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+   zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
if (!zpci_sbv)
goto out_free;
 
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index 2f2226786319..375a58b1c838 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -122,10 +122,12 @@ static inline unsigned long iv_size(unsigned long bits)
  * airq_iv_create - create an interrupt vector
  * @bits: number of bits in the interrupt vector
  * @flags: allocation flags
+ * @vec: pointer to pinned guest memory if AIRQ_IV_GUESTVEC
  *
  * Returns a pointer to an interrupt vector structure
  */
-struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
+  unsigned long *vec)
 {
struct airq_iv *iv;
unsigned long size;
@@ -146,6 +148,8 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned 
long flags)
 &iv->vector_dma);
if (!iv->vector)
goto out_free;
+   } else if (flags & AIRQ_IV_GUESTVEC) {
+   iv->vector = vec;
} else {
iv->vector = cio_dma_zalloc(size);
if (!iv->vector)
@@ -185,7 +189,7 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned 
long flags)
kfree(iv->avail);
if (iv->flags & AIRQ_IV_CACHELINE && iv->vector)
dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
-   else

[PATCH v4 05/32] s390/airq: pass more TPI info to airq handlers

2022-03-14 Thread Matthew Rosato

A subsequent patch will introduce an airq handler that requires additional
TPI information beyond directed vs floating, so pass the entire tpi_info
structure via the handler.  Only pci actually uses this information today,
for the other airq handlers this is effectively a no-op.

Reviewed-by: Eric Farman 
Reviewed-by: Claudio Imbrenda 
Reviewed-by: Pierre Morel 
Reviewed-by: Thomas Huth 
Acked-by: Christian Borntraeger 
Acked-by: Cornelia Huck 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/airq.h | 3 ++-
 arch/s390/kvm/interrupt.c| 4 +++-
 arch/s390/pci/pci_irq.c  | 9 +++--
 drivers/s390/cio/airq.c  | 2 +-
 drivers/s390/cio/qdio_thinint.c  | 6 --
 drivers/s390/crypto/ap_bus.c | 9 ++---
 drivers/s390/virtio/virtio_ccw.c | 4 +++-
 7 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 01936fdfaddb..7918a7d09028 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -12,10 +12,11 @@
 
 #include 
 #include 
+#include 
 
 struct airq_struct {
struct hlist_node list; /* Handler queueing. */
-   void (*handler)(struct airq_struct *airq, bool floating);
+   void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
u8 *lsi_ptr;/* Local-Summary-Indicator pointer */
u8 lsi_mask;/* Local-Summary-Indicator mask */
u8 isc; /* Interrupt-subclass */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index db933c252dbc..65e75ca2fc5d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
@@ -3269,7 +3270,8 @@ int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc)
 }
 EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
 
-static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
+static void gib_alert_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
 {
inc_irq_stat(IRQIO_GAL);
process_gib_alert_list();
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 2b6062c486f5..cc4c8d7c8f5c 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 
 static enum {FLOATING, DIRECTED} irq_delivery;
 
@@ -216,8 +217,11 @@ static void zpci_handle_fallback_irq(void)
}
 }
 
-static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_directed_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
 {
+   bool floating = !tpi_info->directed_irq;
+
if (floating) {
inc_irq_stat(IRQIO_PCF);
zpci_handle_fallback_irq();
@@ -227,7 +231,8 @@ static void zpci_directed_irq_handler(struct airq_struct 
*airq, bool floating)
}
 }
 
-static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_floating_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
 {
unsigned long si, ai;
struct airq_iv *aibv;
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index e56535c99888..2f2226786319 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -99,7 +99,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
rcu_read_lock();
hlist_for_each_entry_rcu(airq, head, list)
if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
-   airq->handler(airq, !tpi_info->directed_irq);
+   airq->handler(airq, tpi_info);
rcu_read_unlock();
 
return IRQ_HANDLED;
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 8e09bf3a2fcd..9b9335dd06db 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "cio.h"
 #include "ioasm.h"
@@ -93,9 +94,10 @@ static inline u32 clear_shared_ind(void)
 /**
  * tiqdio_thinint_handler - thin interrupt handler for qdio
  * @airq: pointer to adapter interrupt descriptor
- * @floating: flag to recognize floating vs. directed interrupts (unused)
+ * @tpi_info: interrupt information (e.g. floating vs directed -- unused)
  */
-static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
+static void tiqdio_thinint_handler(struct airq_struct *airq,
+  struct tpi_info *tpi_info)
 {
u64 irq_time = S390_lowcore.int_clock;
u32 si_used = clear_shared_ind();
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 1986243f9cd3..df1a038442db 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap

[PATCH v4 04/32] s390/sclp: detect the AISI facility

2022-03-14 Thread Matthew Rosato

Detect the Adapter Interruption Suppression Interpretation facility.

Reviewed-by: Eric Farman 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Claudio Imbrenda 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/sclp.h   | 1 +
 drivers/s390/char/sclp_early.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 8c2e142000d4..33b174007848 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -91,6 +91,7 @@ struct sclp_info {
unsigned char has_zpci_lsi : 1;
unsigned char has_aisii : 1;
unsigned char has_aeni : 1;
+   unsigned char has_aisi : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index e9af01b4c97a..c13e55cc4a5d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -47,6 +47,7 @@ static void __init sclp_early_facilities_detect(void)
sclp.has_kss = !!(sccb->fac98 & 0x01);
sclp.has_aisii = !!(sccb->fac118 & 0x40);
sclp.has_aeni = !!(sccb->fac118 & 0x20);
+   sclp.has_aisi = !!(sccb->fac118 & 0x10);
sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 03/32] s390/sclp: detect the AENI facility

2022-03-14 Thread Matthew Rosato

Detect the Adapter Event Notification Interpretation facility.

Reviewed-by: Eric Farman 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Claudio Imbrenda 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/sclp.h   | 1 +
 drivers/s390/char/sclp_early.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 8b56ac5ae496..8c2e142000d4 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -90,6 +90,7 @@ struct sclp_info {
unsigned char has_dirq : 1;
unsigned char has_zpci_lsi : 1;
unsigned char has_aisii : 1;
+   unsigned char has_aeni : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 29fee179e197..e9af01b4c97a 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -46,6 +46,7 @@ static void __init sclp_early_facilities_detect(void)
sclp.has_hvs = !!(sccb->fac119 & 0x80);
sclp.has_kss = !!(sccb->fac98 & 0x01);
sclp.has_aisii = !!(sccb->fac118 & 0x40);
+   sclp.has_aeni = !!(sccb->fac118 & 0x20);
sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 02/32] s390/sclp: detect the AISII facility

2022-03-14 Thread Matthew Rosato

Detect the Adapter Interruption Source ID Interpretation facility.

Reviewed-by: Eric Farman 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Claudio Imbrenda 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/sclp.h   | 1 +
 drivers/s390/char/sclp_early.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 58a4d3d354b7..8b56ac5ae496 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -89,6 +89,7 @@ struct sclp_info {
unsigned char has_sipl : 1;
unsigned char has_dirq : 1;
unsigned char has_zpci_lsi : 1;
+   unsigned char has_aisii : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index b88dd0da1231..29fee179e197 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -45,6 +45,7 @@ static void __init sclp_early_facilities_detect(void)
sclp.has_gisaf = !!(sccb->fac118 & 0x08);
sclp.has_hvs = !!(sccb->fac119 & 0x80);
sclp.has_kss = !!(sccb->fac98 & 0x01);
+   sclp.has_aisii = !!(sccb->fac118 & 0x40);
sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 01/32] s390/sclp: detect the zPCI load/store interpretation facility

2022-03-14 Thread Matthew Rosato

Detect the zPCI Load/Store Interpretation facility.

Reviewed-by: Eric Farman 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Claudio Imbrenda 
Signed-off-by: Matthew Rosato 
---
 arch/s390/include/asm/sclp.h   | 1 +
 drivers/s390/char/sclp_early.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index c68ea35de498..58a4d3d354b7 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -88,6 +88,7 @@ struct sclp_info {
unsigned char has_diag318 : 1;
unsigned char has_sipl : 1;
unsigned char has_dirq : 1;
+   unsigned char has_zpci_lsi : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index e9943a86c361..b88dd0da1231 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -45,6 +45,7 @@ static void __init sclp_early_facilities_detect(void)
sclp.has_gisaf = !!(sccb->fac118 & 0x08);
sclp.has_hvs = !!(sccb->fac119 & 0x80);
sclp.has_kss = !!(sccb->fac98 & 0x01);
+   sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
if (sccb->fac91 & 0x40)
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v4 00/32] KVM: s390: enable zPCI for interpretive execution

2022-03-14 Thread Matthew Rosato

Note: A few patches in this series are dependent on Baolu's IOMMU domain ops
split, which is currently in the next branch of linux-iommu. This series
applies on top: 
https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git 

Enable interpretive execution of zPCI instructions + adapter interruption
forwarding for s390x KVM vfio-pci.  This is done by introducing a new IOMMU
domain for s390x (KVM-managed), indicating via vfio that this IOMMU domain
should be used instead of the default, with subsequent management of the
hardware assists being handled via a new KVM ioctl for zPCI management.

By allowing intepretation of zPCI instructions and firmware delivery of 
interrupts to guests, we can significantly reduce the frequency of guest
SIE exits for zPCI.  We then see additional gains by handling a hot-path
instruction that can still intercept to the hypervisor (RPCIT) directly 
in kvm via the new IOMMU domain, whose map operations update the host   
DMA table with pinned guest entries over the specified range.

>From the perspective of guest configuration, you passthrough zPCI devices
in the same manner as before, with intepretation support being used by
default if available in kernel+qemu.

Will reply with a link to the associated QEMU series.

Changelog v3->v4:
v3: https://lore.kernel.org/kvm/20220204211536.321475-1-mjros...@linux.ibm.com/ 
- Significant overhaul of the userspace API.  Remove all vfio device
  feature ioctls.  Remove CONFIG_VFIO_PCI_ZDEV, this is once again always
  built with vfio-pci for s390; IS_ENABLED checks can instead look at
  CONFIG_VFIO_PCI.  Most earlier patches in the series could maintain
  their reviews, but some needed to be removed due to required code
  changes.
- Instead use a KVM ioctl for zPCI management.  The API is very similar
  to the feature ioctls used in the prior series, with an additional step
  to create an association between an iommu domain + KVM + zPCI device.
- Introduce a new iommu domain ops type for s390-iommu, to be used when 
  KVM manages the IOMMU instead of in response to VFIO mapping ioctls 
- Add a iommu method for specifying the type of domain to allocate
- Add a new type to vfio_iommu_type1 (KVM-owned) to trigger the allocation
  of the KVM-owned IOMMU domain when zPCI interpretation is requested.
  In this case, the KVM-owned type is specified on VFIO_SET_IOMMU. 
- Wire the RPCIT intercepts into the new IOMMU domain via the kernel
  IOMMU API 
- Remove a bunch of unnecessary symbol externs, make the associated
  functions static
- Now that we keep a list of zPCI associated with a given KVM, we can do
  fh lookup on this list vs the list of all zPCI on the host.  We only
  need to do a host-wide fh lookup during the initial device<->KVM
  association.


Matthew Rosato (32):
  s390/sclp: detect the zPCI load/store interpretation facility
  s390/sclp: detect the AISII facility
  s390/sclp: detect the AENI facility
  s390/sclp: detect the AISI facility
  s390/airq: pass more TPI info to airq handlers
  s390/airq: allow for airq structure that uses an input vector
  s390/pci: externalize the SIC operation controls and routine
  s390/pci: stash associated GISA designation
  s390/pci: export some routines related to RPCIT processing
  s390/pci: stash dtsm and maxstbl
  s390/pci: add helper function to find device by handle
  s390/pci: get SHM information from list pci
  s390/pci: return status from zpci_refresh_trans
  iommu: introduce iommu_domain_alloc_type and the KVM type
  vfio: introduce KVM-owned IOMMU type
  vfio-pci/zdev: add function handle to clp base capability
  KVM: s390: pci: add basic kvm_zdev structure
  iommu/s390: add support for IOMMU_DOMAIN_KVM
  KVM: s390: pci: do initial setup for AEN interpretation
  KVM: s390: pci: enable host forwarding of Adapter Event Notifications
  KVM: s390: mechanism to enable guest zPCI Interpretation
  KVM: s390: pci: routines for (dis)associating zPCI devices with a KVM
  KVM: s390: pci: provide routines for enabling/disabling interpretation
  KVM: s390: pci: provide routines for enabling/disabling interrupt
forwarding
  KVM: s390: pci: provide routines for enabling/disabling IOAT assist
  KVM: s390: pci: handle refresh of PCI translations
  KVM: s390: intercept the rpcit instruction
  KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices
  vfio-pci/zdev: add DTSM to clp group capability
  KVM: s390: introduce CPU feature for zPCI Interpretation
  MAINTAINERS: additional files related kvm s390 pci passthrough
  MAINTAINERS: update s390 IOMMU entry

 Documentation/virt/kvm/api.rst   |  60 +++
 MAINTAINERS  |   4 +-
 arch/s390/include/asm/airq.h |   7 +-
 arch/s390/include/asm/kvm_host.h |   7 +
 arch/s390/include/asm/kvm_pci.h  |  40 ++
 arch/s390/include/asm/pci.h  |  12 +
 arch/s390/include/asm/pci_clp.h  |  11 +-
 arch/s390/inc

Re: [PATCH v8 00/11] ACPI/IORT: Support for IORT RMR node

2022-03-14 Thread Lorenzo Pieralisi

On Mon, Mar 14, 2022 at 11:43:51AM +0100, Ard Biesheuvel wrote:
> On Mon, 14 Mar 2022 at 11:37, Eric Auger  wrote:
> >
> > Hi Robin
> >
> > On 3/11/22 11:34 AM, Robin Murphy wrote:
> > > On 2022-03-11 08:19, Eric Auger wrote:
> > >> Hi guys,
> > >>
> > >> On 2/21/22 4:43 PM, Shameer Kolothum wrote:
> > >>> Hi,
> > >>>
> > >>> Since we now have an updated verion[0] of IORT spec(E.d) which
> > >>> addresses the memory attributes issues discussed here [1],
> > >>> this series now make use of it.
> > >>>
> > >>> The pull request for ACPICA E.d related changes are already
> > >>> raised and can be found here,
> > >>> https://github.com/acpica/acpica/pull/752
> > >>>
> > >>> v7 --> v8
> > >>>- Patch #1 has temp definitions for RMR related changes till
> > >>>  the ACPICA header changes are part of kernel.
> > >>>- No early parsing of RMR node info and is only parsed at the
> > >>>  time of use.
> > >>>- Changes to the RMR get/put API format compared to the
> > >>>  previous version.
> > >>>- Support for RMR descriptor shared by multiple stream IDs.
> > >>>
> > >>> Please take a look and let me know your thoughts.
> > >>>
> > >>> Thanks,
> > >>> Shameer
> > >>> [0] https://developer.arm.com/documentation/den0049/ed/
> > >> I still have a question on the IORT E.d spec (unrelated to this series).
> > >>
> > >> The spec mandates that if RMR nodes are presented in the IORT,
> > >> _DSM function #5 for the PCIe host bridge ACPI device object must return
> > >> 0, indicating the OS must honour the PCI config that the FW computed at
> > >> boot time.
> > >>
> > >> However implementing this _DSM #5 as above is known to prevent PCI
> > >> devices with IO ports from working, on aarch64 linux.
> > >>
> > >> "
> > >> The reason is that EFI creates I/O port mappings below
> > >>  0x1000 (in fact, at 0). However Linux, for legacy reasons, does not
> > >>  support I/O ports <= 0x1000 on PCI, so the I/O assignment
> > >> created by EFI
> > >>  is rejected.
> > >>  EFI creates the mappings primarily for itself, and up until
> > >> DSM #5
> > >>  started to be enforced, all PCI resource allocations that
> > >> existed at
> > >>  boot were ignored by Linux and recreated from scratch.
> > >> "
> > >>
> > >> This is an excerpt of a qemu commit message that reverted the _DMS #5
> > >> change (Revert "acpi/gpex: Inform os to keep firmware resource map").
> > >> Has the situation changed since July 2021 (ie. has UEFI been reworked?).
> > >> [+ Ard]
> > >
> > > FWIW I wasn't aware of that, but if it's an issue then it will need to
> > > be fixed in Linux or UEFI's PCI resource code (arguably if UEFI has
> > > already allocated from the bottom of I/O space then Linux should be
> > > safe to assume that there are no legacy PC I/O resources to worry
> > > about). The DSM is required to prevent bus numbers being reassigned,
> > > because if that happens then any PCI StreamIDs referenced in IORT may
> > > suddenly become meaningless and the association of root complex nodes
> > > and RMRs to physical hardware lost.
> >
> > Thank you for confirming and explaining the need for DSM #5. Ard, please
> > could you confirm that the incompatibility with PCI devices with IO
> > ports is still there?
> >
> 
> Yes, and this needs to be fixed in Linux. The firmware complies with
> the pertinent specifications, and it is Linux that deviates from this
> for legacy reasons.
> 
> IIRC, this came up on the mailing list at some point, and one of the
> issues is that I/O port 0x0 is mistaken for 'no resource' or some
> other exceptional case like that, so even if we fix the arbitrary
> limit of 0x1000, we may still run into trouble when devices uses I/O
> port 0x0.

Yes, I need to go back to that thread to sort this out.

Thanks,
Lorenzo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 00/11] ACPI/IORT: Support for IORT RMR node

2022-03-14 Thread Ard Biesheuvel

On Mon, 14 Mar 2022 at 11:37, Eric Auger  wrote:
>
> Hi Robin
>
> On 3/11/22 11:34 AM, Robin Murphy wrote:
> > On 2022-03-11 08:19, Eric Auger wrote:
> >> Hi guys,
> >>
> >> On 2/21/22 4:43 PM, Shameer Kolothum wrote:
> >>> Hi,
> >>>
> >>> Since we now have an updated verion[0] of IORT spec(E.d) which
> >>> addresses the memory attributes issues discussed here [1],
> >>> this series now make use of it.
> >>>
> >>> The pull request for ACPICA E.d related changes are already
> >>> raised and can be found here,
> >>> https://github.com/acpica/acpica/pull/752
> >>>
> >>> v7 --> v8
> >>>- Patch #1 has temp definitions for RMR related changes till
> >>>  the ACPICA header changes are part of kernel.
> >>>- No early parsing of RMR node info and is only parsed at the
> >>>  time of use.
> >>>- Changes to the RMR get/put API format compared to the
> >>>  previous version.
> >>>- Support for RMR descriptor shared by multiple stream IDs.
> >>>
> >>> Please take a look and let me know your thoughts.
> >>>
> >>> Thanks,
> >>> Shameer
> >>> [0] https://developer.arm.com/documentation/den0049/ed/
> >> I still have a question on the IORT E.d spec (unrelated to this series).
> >>
> >> The spec mandates that if RMR nodes are presented in the IORT,
> >> _DSM function #5 for the PCIe host bridge ACPI device object must return
> >> 0, indicating the OS must honour the PCI config that the FW computed at
> >> boot time.
> >>
> >> However implementing this _DSM #5 as above is known to prevent PCI
> >> devices with IO ports from working, on aarch64 linux.
> >>
> >> "
> >> The reason is that EFI creates I/O port mappings below
> >>  0x1000 (in fact, at 0). However Linux, for legacy reasons, does not
> >>  support I/O ports <= 0x1000 on PCI, so the I/O assignment
> >> created by EFI
> >>  is rejected.
> >>  EFI creates the mappings primarily for itself, and up until
> >> DSM #5
> >>  started to be enforced, all PCI resource allocations that
> >> existed at
> >>  boot were ignored by Linux and recreated from scratch.
> >> "
> >>
> >> This is an excerpt of a qemu commit message that reverted the _DMS #5
> >> change (Revert "acpi/gpex: Inform os to keep firmware resource map").
> >> Has the situation changed since July 2021 (ie. has UEFI been reworked?).
> >> [+ Ard]
> >
> > FWIW I wasn't aware of that, but if it's an issue then it will need to
> > be fixed in Linux or UEFI's PCI resource code (arguably if UEFI has
> > already allocated from the bottom of I/O space then Linux should be
> > safe to assume that there are no legacy PC I/O resources to worry
> > about). The DSM is required to prevent bus numbers being reassigned,
> > because if that happens then any PCI StreamIDs referenced in IORT may
> > suddenly become meaningless and the association of root complex nodes
> > and RMRs to physical hardware lost.
>
> Thank you for confirming and explaining the need for DSM #5. Ard, please
> could you confirm that the incompatibility with PCI devices with IO
> ports is still there?
>

Yes, and this needs to be fixed in Linux. The firmware complies with
the pertinent specifications, and it is Linux that deviates from this
for legacy reasons.

IIRC, this came up on the mailing list at some point, and one of the
issues is that I/O port 0x0 is mistaken for 'no resource' or some
other exceptional case like that, so even if we fix the arbitrary
limit of 0x1000, we may still run into trouble when devices uses I/O
port 0x0.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v8 00/11] ACPI/IORT: Support for IORT RMR node

2022-03-14 Thread Eric Auger

Hi Robin

On 3/11/22 11:34 AM, Robin Murphy wrote:
> On 2022-03-11 08:19, Eric Auger wrote:
>> Hi guys,
>>
>> On 2/21/22 4:43 PM, Shameer Kolothum wrote:
>>> Hi,
>>>
>>> Since we now have an updated verion[0] of IORT spec(E.d) which
>>> addresses the memory attributes issues discussed here [1],
>>> this series now make use of it.
>>>
>>> The pull request for ACPICA E.d related changes are already
>>> raised and can be found here,
>>> https://github.com/acpica/acpica/pull/752
>>>
>>> v7 --> v8
>>>    - Patch #1 has temp definitions for RMR related changes till
>>>  the ACPICA header changes are part of kernel.
>>>    - No early parsing of RMR node info and is only parsed at the
>>>  time of use.
>>>    - Changes to the RMR get/put API format compared to the
>>>  previous version.
>>>    - Support for RMR descriptor shared by multiple stream IDs.
>>>
>>> Please take a look and let me know your thoughts.
>>>
>>> Thanks,
>>> Shameer
>>> [0] https://developer.arm.com/documentation/den0049/ed/
>> I still have a question on the IORT E.d spec (unrelated to this series).
>>
>> The spec mandates that if RMR nodes are presented in the IORT,
>> _DSM function #5 for the PCIe host bridge ACPI device object must return
>> 0, indicating the OS must honour the PCI config that the FW computed at
>> boot time.
>>
>> However implementing this _DSM #5 as above is known to prevent PCI
>> devices with IO ports from working, on aarch64 linux.
>>
>> "
>> The reason is that EFI creates I/O port mappings below
>>  0x1000 (in fact, at 0). However Linux, for legacy reasons, does not
>>  support I/O ports <= 0x1000 on PCI, so the I/O assignment
>> created by EFI
>>  is rejected.
>>      EFI creates the mappings primarily for itself, and up until
>> DSM #5
>>  started to be enforced, all PCI resource allocations that
>> existed at
>>  boot were ignored by Linux and recreated from scratch.
>> "
>>
>> This is an excerpt of a qemu commit message that reverted the _DMS #5
>> change (Revert "acpi/gpex: Inform os to keep firmware resource map").
>> Has the situation changed since July 2021 (ie. has UEFI been reworked?).
>> [+ Ard]
>
> FWIW I wasn't aware of that, but if it's an issue then it will need to
> be fixed in Linux or UEFI's PCI resource code (arguably if UEFI has
> already allocated from the bottom of I/O space then Linux should be
> safe to assume that there are no legacy PC I/O resources to worry
> about). The DSM is required to prevent bus numbers being reassigned,
> because if that happens then any PCI StreamIDs referenced in IORT may
> suddenly become meaningless and the association of root complex nodes
> and RMRs to physical hardware lost.

Thank you for confirming and explaining the need for DSM #5. Ard, please
could you confirm that the incompatibility with PCI devices with IO
ports is still there?

Eric

>
> Robin.
>
>> Thank you in advance
>>
>> Regards
>>
>> Eric
>>
>>
>>
>>
>>> [1]
>>> https://lore.kernel.org/linux-acpi/20210805160319.GB23085@lpieralisi/
>>>
>>>  From old:
>>> We have faced issues with 3408iMR RAID controller cards which
>>> fail to boot when SMMU is enabled. This is because these
>>> controllers make use of host memory for various caching related
>>> purposes and when SMMU is enabled the iMR firmware fails to
>>> access these memory regions as there is no mapping for them.
>>> IORT RMR provides a way for UEFI to describe and report these
>>> memory regions so that the kernel can make a unity mapping for
>>> these in SMMU.
>>>
>>> Change History:
>>>
>>> v6 --> v7
>>>   -fix pointed out by Steve to the SMMUv2 SMR bypass install in
>>> patch #8.
>>>
>>> v5 --> v6
>>> - Addressed comments from Robin & Lorenzo.
>>>    : Moved iort_parse_rmr() to acpi_iort_init() from
>>>  iort_init_platform_devices().
>>>    : Removed use of struct iort_rmr_entry during the initial
>>>  parse. Using struct iommu_resv_region instead.
>>>    : Report RMR address alignment and overlap errors, but continue.
>>>    : Reworked arm_smmu_init_bypass_stes() (patch # 6).
>>> - Updated SMMUv2 bypass SMR code. Thanks to Jon N (patch #8).
>>> - Set IOMMU protection flags(IOMMU_CACHE, IOMMU_MMIO) based
>>>    on Type of RMR region. Suggested by Jon N.
>>>
>>> v4 --> v5
>>>   -Added a fw_data union to struct iommu_resv_region and removed
>>>    struct iommu_rmr (Based on comments from Joerg/Robin).
>>>   -Added iommu_put_rmrs() to release mem.
>>>   -Thanks to Steve for verifying on SMMUv2, but not added the Tested-by
>>>    yet because of the above changes.
>>>
>>> v3 -->v4
>>> -Included the SMMUv2 SMR bypass install changes suggested by
>>>   Steve(patch #7)
>>> -As per Robin's comments, RMR reserve implementation is now
>>>   more generic  (patch #8) and dropped v3 patches 8 and 10.
>>> -Rebase to 5.13-rc1
>>>
>>> RFC v2 --> v3
>>>   -Dropped RFC tag as the ACPICA header changes are now ready to be
>>>    part of 5.13[0]. But this series still has a dependency on that
>>> patch.
>>>   -Added

[PATCH 15/15] x86: remove cruft from

2022-03-14 Thread Christoph Hellwig

 gets pulled in by all drivers using the DMA API.
Remove x86 internal variables and unnecessary includes from it.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/include/asm/dma-mapping.h | 11 ---
 arch/x86/include/asm/iommu.h   |  2 ++
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 256fd8115223d..1c66708e30623 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,17 +2,6 @@
 #ifndef _ASM_X86_DMA_MAPPING_H
 #define _ASM_X86_DMA_MAPPING_H
 
-/*
- * IOMMU interface. See Documentation/core-api/dma-api-howto.rst and
- * Documentation/core-api/dma-api.rst for documentation.
- */
-
-#include 
-#include 
-
-extern int iommu_merge;
-extern int panic_on_overflow;
-
 extern const struct dma_map_ops *dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index dba89ed40d38d..0bef44d30a278 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -8,6 +8,8 @@
 
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
+extern int iommu_merge;
+extern int panic_on_overflow;
 
 #ifdef CONFIG_SWIOTLB
 extern bool x86_swiotlb_enable;
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 14/15] swiotlb: remove swiotlb_init_with_tbl and swiotlb_init_late_with_tbl

2022-03-14 Thread Christoph Hellwig

No users left.

Signed-off-by: Christoph Hellwig 
---
 include/linux/swiotlb.h |  2 -
 kernel/dma/swiotlb.c| 85 +++--
 2 files changed, 30 insertions(+), 57 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7b50c82f84ce9..7ed35dd3de6e7 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -34,13 +34,11 @@ struct scatterlist;
 /* default to 64MB */
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
 
-int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
 unsigned long swiotlb_size_or_default(void);
 void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
int (*remap)(void *tlb, unsigned long nslabs));
 int swiotlb_init_late(size_t size, gfp_t gfp_mask,
int (*remap)(void *tlb, unsigned long nslabs));
-extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 88ea7b9bce6e9..d04bacdb0905b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -225,33 +225,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem 
*mem, phys_addr_t start,
return;
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs,
-   unsigned int flags)
-{
-   struct io_tlb_mem *mem = &io_tlb_default_mem;
-   size_t alloc_size;
-
-   if (swiotlb_force_disable)
-   return 0;
-
-   /* protect against double initialization */
-   if (WARN_ON_ONCE(mem->nslabs))
-   return -ENOMEM;
-
-   alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
-   mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
-   if (!mem->slots)
-   panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
- __func__, alloc_size, PAGE_SIZE);
-
-   swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
-   mem->force_bounce = flags & SWIOTLB_FORCE;
-
-   if (flags & SWIOTLB_VERBOSE)
-   swiotlb_print_info();
-   return 0;
-}
-
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -259,7 +232,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs,
 void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
int (*remap)(void *tlb, unsigned long nslabs))
 {
+   struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs = default_nslabs;
+   size_t alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
size_t bytes;
void *tlb;
 
@@ -280,7 +255,8 @@ void __init swiotlb_init_remap(bool addressing_limit, 
unsigned int flags,
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb)
-   goto fail;
+   panic("%s: failed to allocate tlb structure\n", __func__);
+
if (remap && remap(tlb, nslabs) < 0) {
memblock_free(tlb, PAGE_ALIGN(bytes));
 
@@ -291,14 +267,17 @@ void __init swiotlb_init_remap(bool addressing_limit, 
unsigned int flags,
nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
goto retry;
}
-   if (swiotlb_init_with_tbl(tlb, default_nslabs, flags))
-   goto fail_free_mem;
-   return;
 
-fail_free_mem:
-   memblock_free(tlb, bytes);
-fail:
-   pr_warn("Cannot allocate buffer");
+   mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
+   if (!mem->slots)
+   panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
+   swiotlb_init_io_tlb_mem(mem, __pa(tlb), default_nslabs, false);
+   mem->force_bounce = flags & SWIOTLB_FORCE;
+
+   if (flags & SWIOTLB_VERBOSE)
+   swiotlb_print_info();
 }
 
 void __init swiotlb_init(bool addressing_limit, unsigned int flags)
@@ -314,6 +293,7 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
 int swiotlb_init_late(size_t size, gfp_t gfp_mask,
int (*remap)(void *tlb, unsigned long nslabs))
 {
+   struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned long bytes;
unsigned char *vstart = NULL;
@@ -355,33 +335,28 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
(PAGE_SIZE << order) >> 20);
nslabs = SLABS_PER_PAGE << order;
}
-   rc = swiotlb_late_init_with_tbl(vstart, nslabs);
-   if (rc)
-   free_pages((unsigned long)vstart, order);
-
-   return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
-   struct io_tlb_mem *mem = &io_tlb_default_mem;
-

[PATCH 13/15] swiotlb: merge swiotlb-xen initialization into swiotlb

2022-03-14 Thread Christoph Hellwig

Reuse the generic swiotlb initialization for xen-swiotlb.  For ARM/ARM64
this works trivially, while for x86 xen_swiotlb_fixup needs to be passed
as the remap argument to swiotlb_init_remap/swiotlb_init_late.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/xen/mm.c   |  21 +++---
 arch/x86/include/asm/xen/page.h |   5 --
 arch/x86/kernel/pci-dma.c   |  19 +++--
 drivers/xen/swiotlb-xen.c   | 128 +---
 include/xen/arm/page.h  |   1 -
 include/xen/swiotlb-xen.h   |   8 +-
 6 files changed, 27 insertions(+), 155 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 28c2070602535..ff05a7899cb86 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -23,22 +23,20 @@
 #include 
 #include 
 
-unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+static gfp_t xen_swiotlb_gfp(void)
 {
phys_addr_t base;
-   gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
u64 i;
 
for_each_mem_range(i, &base, NULL) {
if (base < (phys_addr_t)0x) {
if (IS_ENABLED(CONFIG_ZONE_DMA32))
-   flags |= __GFP_DMA32;
-   else
-   flags |= __GFP_DMA;
-   break;
+   return __GFP_DMA32;
+   return __GFP_DMA;
}
}
-   return __get_free_pages(flags, order);
+
+   return GFP_KERNEL;
 }
 
 static bool hypercall_cflush = false;
@@ -140,10 +138,13 @@ static int __init xen_mm_init(void)
if (!xen_swiotlb_detect())
return 0;
 
-   rc = xen_swiotlb_init();
/* we can work with the default swiotlb */
-   if (rc < 0 && rc != -EEXIST)
-   return rc;
+   if (!io_tlb_default_mem.nslabs) {
+   rc = swiotlb_init_late(swiotlb_size_or_default(),
+  xen_swiotlb_gfp(), NULL);
+   if (rc < 0)
+   return rc;
+   }
 
cflush.op = 0;
cflush.a.dev_bus_addr = 0;
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index e989bc2269f54..1fc67df500145 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -357,9 +357,4 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
return false;
 }
 
-static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
-{
-   return __get_free_pages(__GFP_NOWARN, order);
-}
-
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a705a199bf8a3..dbb7b83fc3e48 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -72,15 +72,12 @@ static inline void __init pci_swiotlb_detect(void)
 #endif /* CONFIG_SWIOTLB */
 
 #ifdef CONFIG_SWIOTLB_XEN
-static bool xen_swiotlb;
-
 static void __init pci_xen_swiotlb_init(void)
 {
if (!xen_initial_domain() && !x86_swiotlb_enable)
return;
x86_swiotlb_enable = true;
-   xen_swiotlb = true;
-   xen_swiotlb_init_early();
+   swiotlb_init_remap(true, x86_swiotlb_flags, xen_swiotlb_fixup);
dma_ops = &xen_swiotlb_dma_ops;
if (IS_ENABLED(CONFIG_PCI))
pci_request_acs();
@@ -88,14 +85,16 @@ static void __init pci_xen_swiotlb_init(void)
 
 int pci_xen_swiotlb_init_late(void)
 {
-   int rc;
-
-   if (xen_swiotlb)
+   if (dma_ops == &xen_swiotlb_dma_ops)
return 0;
 
-   rc = xen_swiotlb_init();
-   if (rc)
-   return rc;
+   /* we can work with the default swiotlb */
+   if (!io_tlb_default_mem.nslabs) {
+   int rc = swiotlb_init_late(swiotlb_size_or_default(),
+  GFP_KERNEL, xen_swiotlb_fixup);
+   if (rc < 0)
+   return rc;
+   }
 
/* XXX: this switches the dma ops under live devices! */
dma_ops = &xen_swiotlb_dma_ops;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c2da3eb4826e8..df8085b50df10 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -104,7 +104,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, 
dma_addr_t dma_addr)
return 0;
 }
 
-static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 {
int rc;
unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
@@ -130,132 +130,6 @@ static int xen_swiotlb_fixup(void *buf, unsigned long 
nslabs)
return 0;
 }
 
-enum xen_swiotlb_err {
-   XEN_SWIOTLB_UNKNOWN = 0,
-   XEN_SWIOTLB_ENOMEM,
-   XEN_SWIOTLB_EFIXUP
-};
-
-static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
-{
-   switch (err) {
-   case XEN_SWIOTLB_ENOMEM:
-   return "Cannot allocate Xen-SWIOTLB buffer\n";
-   case XEN_SWIOTLB_EFIXUP:

[PATCH 12/15] swiotlb: provide swiotlb_init variants that remap the buffer

2022-03-14 Thread Christoph Hellwig

To shared more code between swiotlb and xen-swiotlb, offer a
swiotlb_init_remap interface and add a remap callback to
swiotlb_init_late that will allow Xen to remap the buffer the
buffer without duplicating much of the logic.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/pci/sta2x11-fixup.c |  2 +-
 include/linux/swiotlb.h  |  5 -
 kernel/dma/swiotlb.c | 38 +---
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index c7e6faf59a861..7368afc039987 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
int size = STA2X11_SWIOTLB_SIZE;
/* First instance: register your own swiotlb area */
dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-   if (swiotlb_init_late(size, GFP_DMA))
+   if (swiotlb_init_late(size, GFP_DMA, NULL))
dev_emerg(&pdev->dev, "init swiotlb failed\n");
}
list_add(&instance->list, &sta2x11_instance_list);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ee655f2e4d28b..7b50c82f84ce9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -36,8 +36,11 @@ struct scatterlist;
 
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
 unsigned long swiotlb_size_or_default(void);
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+   int (*remap)(void *tlb, unsigned long nslabs));
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+   int (*remap)(void *tlb, unsigned long nslabs));
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-int swiotlb_init_late(size_t size, gfp_t gfp_mask);
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 79641c446d284..88ea7b9bce6e9 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -256,9 +256,11 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs,
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
  */
-void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+   int (*remap)(void *tlb, unsigned long nslabs))
 {
-   size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+   unsigned long nslabs = default_nslabs;
+   size_t bytes;
void *tlb;
 
if (!addressing_limit && !swiotlb_force_bounce)
@@ -271,12 +273,24 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
 * allow to pick a location everywhere for hypervisors with guest
 * memory encryption.
 */
+retry:
+   bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
if (flags & SWIOTLB_ANY)
tlb = memblock_alloc(bytes, PAGE_SIZE);
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb)
goto fail;
+   if (remap && remap(tlb, nslabs) < 0) {
+   memblock_free(tlb, PAGE_ALIGN(bytes));
+
+   /* Min is 2MB */
+   if (nslabs <= 1024)
+   panic("%s: Failed to remap %zu bytes\n",
+ __func__, bytes);
+   nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+   goto retry;
+   }
if (swiotlb_init_with_tbl(tlb, default_nslabs, flags))
goto fail_free_mem;
return;
@@ -287,12 +301,18 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
pr_warn("Cannot allocate buffer");
 }
 
+void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+{
+   return swiotlb_init_remap(addressing_limit, flags, NULL);
+}
+
 /*
  * Systems with larger DMA zones (those that don't support ISA) can
  * initialize the swiotlb later using the slab allocator if needed.
  * This should be just like above, but with some error catching.
  */
-int swiotlb_init_late(size_t size, gfp_t gfp_mask)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+   int (*remap)(void *tlb, unsigned long nslabs))
 {
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned long bytes;
@@ -303,6 +323,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
if (swiotlb_force_disable)
return 0;
 
+retry:
order = get_order(nslabs << IO_TLB_SHIFT);
nslabs = SLABS_PER_PAGE << order;
bytes = nslabs << IO_TLB_SHIFT;
@@ -317,6 +338,17 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
 
if (!vstart)
return -ENOMEM;
+   if

[PATCH 11/15] swiotlb: pass a gfp_mask argument to swiotlb_init_late

2022-03-14 Thread Christoph Hellwig

Let the caller chose a zone to allocate from.  This will be used
later on by the xen-swiotlb initialization on arm.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Anshuman Khandual 
---
 arch/x86/pci/sta2x11-fixup.c | 2 +-
 include/linux/swiotlb.h  | 2 +-
 kernel/dma/swiotlb.c | 7 ++-
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index e0c039a75b2db..c7e6faf59a861 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
int size = STA2X11_SWIOTLB_SIZE;
/* First instance: register your own swiotlb area */
dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-   if (swiotlb_init_late(size))
+   if (swiotlb_init_late(size, GFP_DMA))
dev_emerg(&pdev->dev, "init swiotlb failed\n");
}
list_add(&instance->list, &sta2x11_instance_list);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index eabdd89987027..ee655f2e4d28b 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -37,7 +37,7 @@ struct scatterlist;
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
 unsigned long swiotlb_size_or_default(void);
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-int swiotlb_init_late(size_t size);
+int swiotlb_init_late(size_t size, gfp_t gfp_mask);
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 2ad12562c94fe..79641c446d284 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -292,7 +292,7 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
  * initialize the swiotlb later using the slab allocator if needed.
  * This should be just like above, but with some error catching.
  */
-int swiotlb_init_late(size_t size)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask)
 {
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned long bytes;
@@ -303,15 +303,12 @@ int swiotlb_init_late(size_t size)
if (swiotlb_force_disable)
return 0;
 
-   /*
-* Get IO TLB memory from the low pages
-*/
order = get_order(nslabs << IO_TLB_SHIFT);
nslabs = SLABS_PER_PAGE << order;
bytes = nslabs << IO_TLB_SHIFT;
 
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-   vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+   vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
  order);
if (vstart)
break;
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 10/15] swiotlb: add a SWIOTLB_ANY flag to lift the low memory restriction

2022-03-14 Thread Christoph Hellwig

Power SVM wants to allocate a swiotlb buffer that is not restricted to
low memory for the trusted hypervisor scheme.  Consolidate the support
for this into the swiotlb_init interface by adding a new flag.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/include/asm/svm.h   |  4 
 arch/powerpc/include/asm/swiotlb.h   |  1 +
 arch/powerpc/kernel/dma-swiotlb.c|  1 +
 arch/powerpc/mm/mem.c|  5 +
 arch/powerpc/platforms/pseries/svm.c | 26 +-
 include/linux/swiotlb.h  |  1 +
 kernel/dma/swiotlb.c | 11 +--
 7 files changed, 14 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
index 7546402d796af..85580b30aba48 100644
--- a/arch/powerpc/include/asm/svm.h
+++ b/arch/powerpc/include/asm/svm.h
@@ -15,8 +15,6 @@ static inline bool is_secure_guest(void)
return mfmsr() & MSR_S;
 }
 
-void __init svm_swiotlb_init(void);
-
 void dtl_cache_ctor(void *addr);
 #define get_dtl_cache_ctor()   (is_secure_guest() ? dtl_cache_ctor : NULL)
 
@@ -27,8 +25,6 @@ static inline bool is_secure_guest(void)
return false;
 }
 
-static inline void svm_swiotlb_init(void) {}
-
 #define get_dtl_cache_ctor() NULL
 
 #endif /* CONFIG_PPC_SVM */
diff --git a/arch/powerpc/include/asm/swiotlb.h 
b/arch/powerpc/include/asm/swiotlb.h
index 3c1a1cd161286..4203b5e0a88ed 100644
--- a/arch/powerpc/include/asm/swiotlb.h
+++ b/arch/powerpc/include/asm/swiotlb.h
@@ -9,6 +9,7 @@
 #include 
 
 extern unsigned int ppc_swiotlb_enable;
+extern unsigned int ppc_swiotlb_flags;
 
 #ifdef CONFIG_SWIOTLB
 void swiotlb_detect_4g(void);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index fc7816126a401..ba256c37bcc0f 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -10,6 +10,7 @@
 #include 
 
 unsigned int ppc_swiotlb_enable;
+unsigned int ppc_swiotlb_flags;
 
 void __init swiotlb_detect_4g(void)
 {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index e1519e2edc656..a4d65418c30a9 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -249,10 +249,7 @@ void __init mem_init(void)
 * back to to-down.
 */
memblock_set_bottom_up(true);
-   if (is_secure_guest())
-   svm_swiotlb_init();
-   else
-   swiotlb_init(ppc_swiotlb_enable, 0);
+   swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags);
 #endif
 
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/powerpc/platforms/pseries/svm.c 
b/arch/powerpc/platforms/pseries/svm.c
index c5228f4969eb2..3b4045d508ec8 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -28,7 +28,7 @@ static int __init init_svm(void)
 * need to use the SWIOTLB buffer for DMA even if dma_capable() says
 * otherwise.
 */
-   swiotlb_force = SWIOTLB_FORCE;
+   ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE;
 
/* Share the SWIOTLB buffer with the host. */
swiotlb_update_mem_attributes();
@@ -37,30 +37,6 @@ static int __init init_svm(void)
 }
 machine_early_initcall(pseries, init_svm);
 
-/*
- * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it
- * can allocate the buffer anywhere in memory. Since the hypervisor doesn't 
have
- * any addressing limitation, we don't need to allocate it in low addresses.
- */
-void __init svm_swiotlb_init(void)
-{
-   unsigned char *vstart;
-   unsigned long bytes, io_tlb_nslabs;
-
-   io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
-   io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-
-   bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-   vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
-   if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
-   return;
-
-
-   memblock_free(vstart, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-   panic("SVM: Cannot allocate SWIOTLB buffer");
-}
-
 int set_memory_encrypted(unsigned long addr, int numpages)
 {
if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ae0407173e845..eabdd89987027 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -15,6 +15,7 @@ struct scatterlist;
 
 #define SWIOTLB_VERBOSE(1 << 0) /* verbose initialization */
 #define SWIOTLB_FORCE  (1 << 1) /* force bounce buffering */
+#define SWIOTLB_ANY(1 << 2) /* allow any memory for the buffer */
 
 /*
  * Maximum allowable number of contiguous slabs to map,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 5ac6e128d4279..2ad12562c94fe 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -266,8 +266,15 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
if (swiotlb_force_disable)
return;
 
-   /* Get IO TLB m

[PATCH 09/15] swiotlb: make the swiotlb_init interface more useful

2022-03-14 Thread Christoph Hellwig

Pass a bool to pass if swiotlb needs to be enabled based on the
addressing needs and replace the verbose argument with a set of
flags, including one to force enable bounce buffering.

Note that this patch removes the possibility to force xen-swiotlb
use using swiotlb=force on the command line on x86 (arm and arm64
never supported that), but this interface will be restored shortly.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/init.c |  6 +
 arch/arm64/mm/init.c   |  6 +
 arch/ia64/mm/init.c|  4 +--
 arch/mips/cavium-octeon/dma-octeon.c   |  2 +-
 arch/mips/loongson64/dma.c |  2 +-
 arch/mips/sibyte/common/dma.c  |  2 +-
 arch/powerpc/mm/mem.c  |  3 ++-
 arch/powerpc/platforms/pseries/setup.c |  3 ---
 arch/riscv/mm/init.c   |  8 +-
 arch/s390/mm/init.c|  3 +--
 arch/x86/kernel/pci-dma.c  | 15 ++-
 drivers/xen/swiotlb-xen.c  |  4 +--
 include/linux/swiotlb.h| 15 ++-
 include/trace/events/swiotlb.h | 29 -
 kernel/dma/swiotlb.c   | 35 ++
 15 files changed, 55 insertions(+), 82 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 6d0cb0f7bc54b..73f30d278b565 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -312,11 +312,7 @@ static void __init free_highpages(void)
 void __init mem_init(void)
 {
 #ifdef CONFIG_ARM_LPAE
-   if (swiotlb_force == SWIOTLB_FORCE ||
-   max_pfn > arm_dma_pfn_limit)
-   swiotlb_init(1);
-   else
-   swiotlb_force = SWIOTLB_NO_FORCE;
+   swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE);
 #endif
 
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index db63cc885771a..52102adda3d28 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -373,11 +373,7 @@ void __init bootmem_init(void)
  */
 void __init mem_init(void)
 {
-   if (swiotlb_force == SWIOTLB_FORCE ||
-   max_pfn > PFN_DOWN(arm64_dma_phys_limit))
-   swiotlb_init(1);
-   else if (!xen_swiotlb_detect())
-   swiotlb_force = SWIOTLB_NO_FORCE;
+   swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), SWIOTLB_VERBOSE);
 
/* this will put all unused low memory onto the freelists */
memblock_free_all();
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 5d165607bf354..3c3e15b22608f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -437,9 +437,7 @@ mem_init (void)
if (iommu_detected)
break;
 #endif
-#ifdef CONFIG_SWIOTLB
-   swiotlb_init(1);
-#endif
+   swiotlb_init(true, SWIOTLB_VERBOSE);
} while (0);
 
 #ifdef CONFIG_FLATMEM
diff --git a/arch/mips/cavium-octeon/dma-octeon.c 
b/arch/mips/cavium-octeon/dma-octeon.c
index fb7547e217263..9fbba6a8fa4c5 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -235,5 +235,5 @@ void __init plat_swiotlb_setup(void)
 #endif
 
swiotlb_adjust_size(swiotlbsize);
-   swiotlb_init(1);
+   swiotlb_init(true, SWIOTLB_VERBOSE);
 }
diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c
index 364f2f27c8723..8220a1bc0db64 100644
--- a/arch/mips/loongson64/dma.c
+++ b/arch/mips/loongson64/dma.c
@@ -24,5 +24,5 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 
 void __init plat_swiotlb_setup(void)
 {
-   swiotlb_init(1);
+   swiotlb_init(true, SWIOTLB_VERBOSE);
 }
diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c
index eb47a94f3583e..c5c2c782aff68 100644
--- a/arch/mips/sibyte/common/dma.c
+++ b/arch/mips/sibyte/common/dma.c
@@ -10,5 +10,5 @@
 
 void __init plat_swiotlb_setup(void)
 {
-   swiotlb_init(1);
+   swiotlb_init(true, SWIOTLB_VERBOSE);
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8e301cd8925b2..e1519e2edc656 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -251,7 +252,7 @@ void __init mem_init(void)
if (is_secure_guest())
svm_swiotlb_init();
else
-   swiotlb_init(0);
+   swiotlb_init(ppc_swiotlb_enable, 0);
 #endif
 
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index 83a04d967a59f..45d637ab58261 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -838,9 +838,6 @@ static void __init pSeries_setup_arch(void)
}
 
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
-
-   if (swiotlb_force == SWIOTLB_FORCE)
-   ppc_swiotlb_enable

[PATCH 07/15] x86: remove the IOMMU table infrastructure

2022-03-14 Thread Christoph Hellwig

The IOMMU table tries to separate the different IOMMUs into different
backends, but actually requires various cross calls.

Rewrite the code to do the generic swiotlb/swiotlb-xen setup directly
in pci-dma.c and then just call into the IOMMU drivers.

Signed-off-by: Christoph Hellwig 
---
 arch/ia64/include/asm/iommu_table.h|   7 --
 arch/x86/include/asm/dma-mapping.h |   1 -
 arch/x86/include/asm/gart.h|   5 +-
 arch/x86/include/asm/iommu.h   |   6 ++
 arch/x86/include/asm/iommu_table.h | 102 ---
 arch/x86/include/asm/swiotlb.h |  30 ---
 arch/x86/include/asm/xen/swiotlb-xen.h |   2 -
 arch/x86/kernel/Makefile   |   2 -
 arch/x86/kernel/amd_gart_64.c  |   5 +-
 arch/x86/kernel/aperture_64.c  |  14 ++--
 arch/x86/kernel/pci-dma.c  | 107 -
 arch/x86/kernel/pci-iommu_table.c  |  77 --
 arch/x86/kernel/pci-swiotlb.c  |  77 --
 arch/x86/kernel/tboot.c|   1 -
 arch/x86/kernel/vmlinux.lds.S  |  12 ---
 arch/x86/xen/Makefile  |   2 -
 arch/x86/xen/pci-swiotlb-xen.c |  96 --
 drivers/iommu/amd/init.c   |   6 --
 drivers/iommu/amd/iommu.c  |   5 +-
 drivers/iommu/intel/dmar.c |   6 +-
 include/linux/dmar.h   |   6 +-
 21 files changed, 110 insertions(+), 459 deletions(-)
 delete mode 100644 arch/ia64/include/asm/iommu_table.h
 delete mode 100644 arch/x86/include/asm/iommu_table.h
 delete mode 100644 arch/x86/include/asm/swiotlb.h
 delete mode 100644 arch/x86/kernel/pci-iommu_table.c
 delete mode 100644 arch/x86/kernel/pci-swiotlb.c
 delete mode 100644 arch/x86/xen/pci-swiotlb-xen.c

diff --git a/arch/ia64/include/asm/iommu_table.h 
b/arch/ia64/include/asm/iommu_table.h
deleted file mode 100644
index cc96116ac276a..0
--- a/arch/ia64/include/asm/iommu_table.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_IA64_IOMMU_TABLE_H
-#define _ASM_IA64_IOMMU_TABLE_H
-
-#define IOMMU_INIT_POST(_detect)
-
-#endif /* _ASM_IA64_IOMMU_TABLE_H */
diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index bb1654fe0ce74..256fd8115223d 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -9,7 +9,6 @@
 
 #include 
 #include 
-#include 
 
 extern int iommu_merge;
 extern int panic_on_overflow;
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 3185565743459..5af8088a10df6 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled;
 extern void early_gart_iommu_check(void);
 extern int gart_iommu_init(void);
 extern void __init gart_parse_options(char *);
-extern int gart_iommu_hole_init(void);
+void gart_iommu_hole_init(void);
 
 #else
 #define gart_iommu_aperture0
@@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void)
 static inline void gart_parse_options(char *options)
 {
 }
-static inline int gart_iommu_hole_init(void)
+static inline void gart_iommu_hole_init(void)
 {
-   return -ENODEV;
 }
 #endif
 
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74bd..dba89ed40d38d 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -9,6 +9,12 @@
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
+#ifdef CONFIG_SWIOTLB
+extern bool x86_swiotlb_enable;
+#else
+#define x86_swiotlb_enable false
+#endif
+
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
diff --git a/arch/x86/include/asm/iommu_table.h 
b/arch/x86/include/asm/iommu_table.h
deleted file mode 100644
index 1fb3fd1a83c25..0
--- a/arch/x86/include/asm/iommu_table.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IOMMU_TABLE_H
-#define _ASM_X86_IOMMU_TABLE_H
-
-#include 
-
-/*
- * History lesson:
- * The execution chain of IOMMUs in 2.6.36 looks as so:
- *
- *[xen-swiotlb]
- * |
- * +[swiotlb *]--+
- */ | \
- *   /  |  \
- *[GART] [Calgary]  [Intel VT-d]
- * /
- */
- * [AMD-Vi]
- *
- * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip
- * over the rest of IOMMUs and unconditionally initialize the SWIOTLB.
- * Also it would surreptitiously initialize set the swiotlb=1 if there were
- * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb
- * flag would be turned off by all IOMMUs except the Calgary one.
- *
- * The IOMMU_INIT* macros allow a similar tree (or more complex if desired)
- * to be built by defining who we depend on.
- *
- * And all that needs to be done is to use one of the macros in the IOMMU
- * and the pci-dma.c will take care of the rest.
- */
-
-struct

[PATCH 08/15] x86: centralize setting SWIOTLB_FORCE when guest memory encryption is enabled

2022-03-14 Thread Christoph Hellwig

Move enabling SWIOTLB_FORCE for guest memory encryption into common code.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/cpu/mshyperv.c | 8 
 arch/x86/kernel/pci-dma.c  | 8 
 arch/x86/mm/mem_encrypt_amd.c  | 3 ---
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 5a99f993e6392..568274917f1cd 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -336,14 +336,6 @@ static void __init ms_hyperv_init_platform(void)
swiotlb_unencrypted_base = 
ms_hyperv.shared_gpa_boundary;
 #endif
}
-
-#ifdef CONFIG_SWIOTLB
-   /*
-* Enable swiotlb force mode in Isolation VM to
-* use swiotlb bounce buffer for dma transaction.
-*/
-   swiotlb_force = SWIOTLB_FORCE;
-#endif
}
 
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index df96926421be0..04140e20ef1a3 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -53,6 +53,14 @@ static void __init pci_swiotlb_detect(void)
if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
x86_swiotlb_enable = true;
 
+   /*
+* Guest with guest memory encryption currently perform all DMA through
+* bounce buffers as the hypervisor can't access arbitrary VM memory
+* that is not explicitly shared with it.
+*/
+   if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+   swiotlb_force = SWIOTLB_FORCE;
+
if (swiotlb_force == SWIOTLB_FORCE)
x86_swiotlb_enable = true;
 }
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 2b2d018ea3450..a72942d569cf9 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -191,9 +191,6 @@ void __init sme_early_init(void)
/* Update the protection map with memory encryption mask */
for (i = 0; i < ARRAY_SIZE(protection_map); i++)
protection_map[i] = pgprot_encrypted(protection_map[i]);
-
-   if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
-   swiotlb_force = SWIOTLB_FORCE;
 }
 
 void __init sev_setup_arch(void)
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 06/15] MIPS/octeon: use swiotlb_init instead of open coding it

2022-03-14 Thread Christoph Hellwig

Use the generic swiotlb initialization helper instead of open coding it.

Signed-off-by: Christoph Hellwig 
Acked-by: Thomas Bogendoerfer 
---
 arch/mips/cavium-octeon/dma-octeon.c | 15 ++-
 arch/mips/pci/pci-octeon.c   |  2 +-
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/arch/mips/cavium-octeon/dma-octeon.c 
b/arch/mips/cavium-octeon/dma-octeon.c
index df70308db0e69..fb7547e217263 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -186,15 +186,12 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t 
daddr)
return daddr;
 }
 
-char *octeon_swiotlb;
-
 void __init plat_swiotlb_setup(void)
 {
phys_addr_t start, end;
phys_addr_t max_addr;
phys_addr_t addr_size;
size_t swiotlbsize;
-   unsigned long swiotlb_nslabs;
u64 i;
 
max_addr = 0;
@@ -236,15 +233,7 @@ void __init plat_swiotlb_setup(void)
if (OCTEON_IS_OCTEON2() && max_addr >= 0x1ul)
swiotlbsize = 64 * (1<<20);
 #endif
-   swiotlb_nslabs = swiotlbsize >> IO_TLB_SHIFT;
-   swiotlb_nslabs = ALIGN(swiotlb_nslabs, IO_TLB_SEGSIZE);
-   swiotlbsize = swiotlb_nslabs << IO_TLB_SHIFT;
-
-   octeon_swiotlb = memblock_alloc_low(swiotlbsize, PAGE_SIZE);
-   if (!octeon_swiotlb)
-   panic("%s: Failed to allocate %zu bytes align=%lx\n",
- __func__, swiotlbsize, PAGE_SIZE);
 
-   if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
-   panic("Cannot allocate SWIOTLB buffer");
+   swiotlb_adjust_size(swiotlbsize);
+   swiotlb_init(1);
 }
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index fc29b85cfa926..e457a18cbdc59 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -664,7 +664,7 @@ static int __init octeon_pci_setup(void)
 
/* BAR1 movable regions contiguous to cover the swiotlb */
octeon_bar1_pci_phys =
-   virt_to_phys(octeon_swiotlb) & ~((1ull << 22) - 1);
+   io_tlb_default_mem.start & ~((1ull << 22) - 1);
 
for (index = 0; index < 32; index++) {
union cvmx_pci_bar1_indexx bar1_index;
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 05/15] arm/xen: don't check for xen_initial_domain() in xen_create_contiguous_region

2022-03-14 Thread Christoph Hellwig

From: Stefano Stabellini 

It used to be that Linux enabled swiotlb-xen when running a dom0 on ARM.
Since f5079a9a2a31 "xen/arm: introduce XENFEAT_direct_mapped and
XENFEAT_not_direct_mapped", Linux detects whether to enable or disable
swiotlb-xen based on the new feature flags: XENFEAT_direct_mapped and
XENFEAT_not_direct_mapped.

However, there is still a leftover xen_initial_domain() check in
xen_create_contiguous_region. Remove the check as
xen_create_contiguous_region is only called by swiotlb-xen during
initialization. If xen_create_contiguous_region is called, we know Linux
is running 1:1 mapped so there is no need for additional checks.

Also update the in-code comment.

Signed-off-by: Stefano Stabellini 
Signed-off-by: Christoph Hellwig 
---
 arch/arm/xen/mm.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index a7e54a087b802..28c2070602535 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -122,10 +122,7 @@ int xen_create_contiguous_region(phys_addr_t pstart, 
unsigned int order,
 unsigned int address_bits,
 dma_addr_t *dma_handle)
 {
-   if (!xen_initial_domain())
-   return -EINVAL;
-
-   /* we assume that dom0 is mapped 1:1 for now */
+   /* the domain is 1:1 mapped to use swiotlb-xen */
*dma_handle = pstart;
return 0;
 }
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 04/15] swiotlb: rename swiotlb_late_init_with_default_size

2022-03-14 Thread Christoph Hellwig

swiotlb_late_init_with_default_size is an overly verbose name that
doesn't even catch what the function is doing, given that the size is
not just a default but the actual requested size.

Rename it to swiotlb_init_late.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Anshuman Khandual 
---
 arch/x86/pci/sta2x11-fixup.c | 2 +-
 include/linux/swiotlb.h  | 2 +-
 kernel/dma/swiotlb.c | 6 ++
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 101081ad64b6d..e0c039a75b2db 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
int size = STA2X11_SWIOTLB_SIZE;
/* First instance: register your own swiotlb area */
dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-   if (swiotlb_late_init_with_default_size(size))
+   if (swiotlb_init_late(size))
dev_emerg(&pdev->dev, "init swiotlb failed\n");
}
list_add(&instance->list, &sta2x11_instance_list);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 9fb3a568f0c51..b48b26bfa0edb 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -40,7 +40,7 @@ extern void swiotlb_init(int verbose);
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 unsigned long swiotlb_size_or_default(void);
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-extern int swiotlb_late_init_with_default_size(size_t default_size);
+int swiotlb_init_late(size_t size);
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b75943c2a0a0e..14e08fa9621c2 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -281,11 +281,9 @@ swiotlb_init(int verbose)
  * initialize the swiotlb later using the slab allocator if needed.
  * This should be just like above, but with some error catching.
  */
-int
-swiotlb_late_init_with_default_size(size_t default_size)
+int swiotlb_init_late(size_t size)
 {
-   unsigned long nslabs =
-   ALIGN(default_size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+   unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned long bytes;
unsigned char *vstart = NULL;
unsigned int order;
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 02/15] swiotlb: make swiotlb_exit a no-op if SWIOTLB_FORCE is set

2022-03-14 Thread Christoph Hellwig

If force bouncing is enabled we can't release the buffers.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Anshuman Khandual 
---
 kernel/dma/swiotlb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 908eac2527cb1..af9d257501a64 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -369,6 +369,9 @@ void __init swiotlb_exit(void)
unsigned long tbl_vaddr;
size_t tbl_size, slots_size;
 
+   if (swiotlb_force == SWIOTLB_FORCE)
+   return;
+
if (!mem->nslabs)
return;
 
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 03/15] swiotlb: simplify swiotlb_max_segment

2022-03-14 Thread Christoph Hellwig

Remove the bogus Xen override that was usually larger than the actual
size and just calculate the value on demand.  Note that
swiotlb_max_segment still doesn't make sense as an interface and should
eventually be removed.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Anshuman Khandual 
---
 drivers/xen/swiotlb-xen.c |  2 --
 include/linux/swiotlb.h   |  1 -
 kernel/dma/swiotlb.c  | 20 +++-
 3 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 47aebd98f52f5..485cd06ed39e7 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -202,7 +202,6 @@ int xen_swiotlb_init(void)
rc = swiotlb_late_init_with_tbl(start, nslabs);
if (rc)
return rc;
-   swiotlb_set_max_segment(PAGE_SIZE);
return 0;
 error:
if (nslabs > 1024 && repeat--) {
@@ -254,7 +253,6 @@ void __init xen_swiotlb_init_early(void)
 
if (swiotlb_init_with_tbl(start, nslabs, true))
panic("Cannot allocate SWIOTLB buffer");
-   swiotlb_set_max_segment(PAGE_SIZE);
 }
 #endif /* CONFIG_X86 */
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f6c3638255d54..9fb3a568f0c51 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -164,7 +164,6 @@ static inline void swiotlb_adjust_size(unsigned long size)
 #endif /* CONFIG_SWIOTLB */
 
 extern void swiotlb_print_info(void);
-extern void swiotlb_set_max_segment(unsigned int);
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 struct page *swiotlb_alloc(struct device *dev, size_t size);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index af9d257501a64..b75943c2a0a0e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -68,12 +68,6 @@ struct io_tlb_mem io_tlb_default_mem;
 
 phys_addr_t swiotlb_unencrypted_base;
 
-/*
- * Max segment that we can provide which (if pages are contingous) will
- * not be bounced (unless SWIOTLB_FORCE is set).
- */
-static unsigned int max_segment;
-
 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
 
 static int __init
@@ -97,18 +91,12 @@ early_param("swiotlb", setup_io_tlb_npages);
 
 unsigned int swiotlb_max_segment(void)
 {
-   return io_tlb_default_mem.nslabs ? max_segment : 0;
+   if (!io_tlb_default_mem.nslabs)
+   return 0;
+   return rounddown(io_tlb_default_mem.nslabs << IO_TLB_SHIFT, PAGE_SIZE);
 }
 EXPORT_SYMBOL_GPL(swiotlb_max_segment);
 
-void swiotlb_set_max_segment(unsigned int val)
-{
-   if (swiotlb_force == SWIOTLB_FORCE)
-   max_segment = 1;
-   else
-   max_segment = rounddown(val, PAGE_SIZE);
-}
-
 unsigned long swiotlb_size_or_default(void)
 {
return default_nslabs << IO_TLB_SHIFT;
@@ -258,7 +246,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs, int verbose)
 
if (verbose)
swiotlb_print_info();
-   swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
 }
 
@@ -359,7 +346,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
 
swiotlb_print_info();
-   swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
 }
 
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 01/15] dma-direct: use is_swiotlb_active in dma_direct_map_page

2022-03-14 Thread Christoph Hellwig

Use the more specific is_swiotlb_active check instead of checking the
global swiotlb_force variable.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Anshuman Khandual 
---
 kernel/dma/direct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 4632b0f4f72eb..4dc16e08c7e1a 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -91,7 +91,7 @@ static inline dma_addr_t dma_direct_map_page(struct device 
*dev,
return swiotlb_map(dev, phys, size, dir, attrs);
 
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
-   if (swiotlb_force != SWIOTLB_NO_FORCE)
+   if (is_swiotlb_active(dev))
return swiotlb_map(dev, phys, size, dir, attrs);
 
dev_WARN_ONCE(dev, 1,
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

cleanup swiotlb initialization v5

2022-03-14 Thread Christoph Hellwig

Hi all,

this series tries to clean up the swiotlb initialization, including
that of swiotlb-xen.  To get there is also removes the x86 iommu table
infrastructure that massively obsfucates the initialization path.

Git tree:

git://git.infradead.org/users/hch/misc.git swiotlb-init-cleanup

Gitweb:


http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/swiotlb-init-cleanup

Changes since v5:
 - split a patch into three
 - fix setting x86_swiotlb_enable for Xen
 - fix a comment about forced bounce buffering for guest memory
   encryption
 - remove the xen_initial_domain check from
   xen_create_contiguous_region

Changes since v3:
 - fix a compilation issue on some powerpc configfs
 - fix and cleanup how forced bounce buffering is enabled for
   guest memory encryption

Changes since v2:
 - make ppc_swiotlb_flags actually work again
 - also force enable swiotlb for guest encrypted memory to cater
   to hyperv which doesn't set the host encrypted memory flag

Changes since v1:
 - skip IOMMU initialization on Xen PV kernels
 - various small whitespace / typo fixes

Diffstat:
 arch/ia64/include/asm/iommu_table.h  |7 -
 arch/x86/include/asm/iommu_table.h   |  102 --
 arch/x86/include/asm/swiotlb.h   |   30 -
 arch/x86/kernel/pci-iommu_table.c|   77 -
 arch/x86/kernel/pci-swiotlb.c|   77 -
 arch/x86/xen/pci-swiotlb-xen.c   |   96 -
 b/arch/arm/mm/init.c |6 -
 b/arch/arm/xen/mm.c  |   26 ++--
 b/arch/arm64/mm/init.c   |6 -
 b/arch/ia64/mm/init.c|4 
 b/arch/mips/cavium-octeon/dma-octeon.c   |   15 --
 b/arch/mips/loongson64/dma.c |2 
 b/arch/mips/pci/pci-octeon.c |2 
 b/arch/mips/sibyte/common/dma.c  |2 
 b/arch/powerpc/include/asm/svm.h |4 
 b/arch/powerpc/include/asm/swiotlb.h |1 
 b/arch/powerpc/kernel/dma-swiotlb.c  |1 
 b/arch/powerpc/mm/mem.c  |6 -
 b/arch/powerpc/platforms/pseries/setup.c |3 
 b/arch/powerpc/platforms/pseries/svm.c   |   26 
 b/arch/riscv/mm/init.c   |8 -
 b/arch/s390/mm/init.c|3 
 b/arch/x86/include/asm/dma-mapping.h |   12 --
 b/arch/x86/include/asm/gart.h|5 
 b/arch/x86/include/asm/iommu.h   |8 +
 b/arch/x86/include/asm/xen/page.h|5 
 b/arch/x86/include/asm/xen/swiotlb-xen.h |2 
 b/arch/x86/kernel/Makefile   |2 
 b/arch/x86/kernel/amd_gart_64.c  |5 
 b/arch/x86/kernel/aperture_64.c  |   14 --
 b/arch/x86/kernel/cpu/mshyperv.c |8 -
 b/arch/x86/kernel/pci-dma.c  |  113 
 b/arch/x86/kernel/tboot.c|1 
 b/arch/x86/kernel/vmlinux.lds.S  |   12 --
 b/arch/x86/mm/mem_encrypt_amd.c  |3 
 b/arch/x86/pci/sta2x11-fixup.c   |2 
 b/arch/x86/xen/Makefile  |2 
 b/drivers/iommu/amd/init.c   |6 -
 b/drivers/iommu/amd/iommu.c  |5 
 b/drivers/iommu/intel/dmar.c |6 -
 b/drivers/xen/swiotlb-xen.c  |  132 ---
 b/include/linux/dmar.h   |6 -
 b/include/linux/swiotlb.h|   22 +--
 b/include/trace/events/swiotlb.h |   29 +
 b/include/xen/arm/page.h |1 
 b/include/xen/swiotlb-xen.h  |8 +
 b/kernel/dma/direct.h|2 
 b/kernel/dma/swiotlb.c   |  175 +++
 48 files changed, 264 insertions(+), 826 deletions(-)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] iommu/amd: Remove redundant check

2022-03-14 Thread Vasant Hegde via iommu

smatch static checker warning:
  drivers/iommu/amd/init.c:1989 amd_iommu_init_pci()
  warn: duplicate check 'ret' (previous on line 1978)

Reported-by: Dan Carpenter 
Fixes: 06687a03805e ("iommu/amd: Improve error handling for amd_iommu_init_pci")
Signed-off-by: Vasant Hegde 
---
 drivers/iommu/amd/init.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 2586e589e54e..8ed1f86fe93d 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2137,8 +2137,7 @@ static int __init amd_iommu_init_pci(void)
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
 
-   if (!ret)
-   print_iommu_info();
+   print_iommu_info();
 
 out:
return ret;
-- 
2.27.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

77 matches

Mail list logo