[PATCH v6 12/12] iommu: Rename iommu-sva-lib.{c,h}

2022-05-09 Thread Lu Baolu
Rename iommu-sva-lib.c[h] to iommu-sva.c[h] as it contains all code
for SVA implementation in iommu core.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
 drivers/iommu/{iommu-sva-lib.h => iommu-sva.h}  | 0
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +-
 drivers/iommu/intel/iommu.c | 2 +-
 drivers/iommu/intel/svm.c   | 2 +-
 drivers/iommu/io-pgfault.c  | 2 +-
 drivers/iommu/{iommu-sva-lib.c => iommu-sva.c}  | 2 +-
 drivers/iommu/Makefile  | 2 +-
 8 files changed, 7 insertions(+), 7 deletions(-)
 rename drivers/iommu/{iommu-sva-lib.h => iommu-sva.h} (100%)
 rename drivers/iommu/{iommu-sva-lib.c => iommu-sva.c} (99%)

diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva.h
similarity index 100%
rename from drivers/iommu/iommu-sva-lib.h
rename to drivers/iommu/iommu-sva.h
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 56644e553c42..265b125d7dc4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -9,7 +9,7 @@
 #include 
 
 #include "arm-smmu-v3.h"
-#include "../../iommu-sva-lib.h"
+#include "../../iommu-sva.h"
 #include "../../io-pgtable-arm.h"
 
 struct arm_smmu_mmu_notifier {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6a10fa181827..de3b6fbf8766 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -31,7 +31,7 @@
 #include 
 
 #include "arm-smmu-v3.h"
-#include "../../iommu-sva-lib.h"
+#include "../../iommu-sva.h"
 
 static bool disable_bypass = true;
 module_param(disable_bypass, bool, 0444);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a5728f743c6d..1c2c92b657c7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -27,7 +27,7 @@
 #include 
 
 #include "../irq_remapping.h"
-#include "../iommu-sva-lib.h"
+#include "../iommu-sva.h"
 #include "pasid.h"
 #include "cap_audit.h"
 
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index ca83ebc708a8..44331db060e4 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -25,7 +25,7 @@
 
 #include "pasid.h"
 #include "perf.h"
-#include "../iommu-sva-lib.h"
+#include "../iommu-sva.h"
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 9efe5259402b..2a8604013b7e 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -11,7 +11,7 @@
 #include 
 #include 
 
-#include "iommu-sva-lib.h"
+#include "iommu-sva.h"
 
 /**
  * struct iopf_queue - IO Page Fault queue
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva.c
similarity index 99%
rename from drivers/iommu/iommu-sva-lib.c
rename to drivers/iommu/iommu-sva.c
index ea12504a9e12..1791ac1e3d34 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva.c
@@ -7,7 +7,7 @@
 #include 
 #include 
 
-#include "iommu-sva-lib.h"
+#include "iommu-sva.h"
 
 static DEFINE_MUTEX(iommu_sva_lock);
 static DECLARE_IOASID_SET(iommu_sva_pasid);
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 44475a9b3eea..c1763476162b 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
 obj-$(CONFIG_APPLE_DART) += apple-dart.o
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 11/12] iommu: Per-domain I/O page fault handling

2022-05-09 Thread Lu Baolu
Tweak the I/O page fault handling framework to route the page faults to
the domain and call the page fault handler retrieved from the domain.
This makes the I/O page fault handling framework possible to serve more
usage scenarios as long as they have an IOMMU domain and install a page
fault handler in it. Some unused functions are also removed to avoid
dead code.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
 drivers/iommu/iommu-sva-lib.h |  2 --
 drivers/iommu/io-pgfault.c| 64 ---
 drivers/iommu/iommu-sva-lib.c | 20 ---
 3 files changed, 7 insertions(+), 79 deletions(-)

diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 3420654c6e2f..74ce2e76321b 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-struct mm_struct *iommu_sva_find(ioasid_t pasid);
-
 /* I/O Page fault */
 struct device;
 struct iommu_fault;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index aee9e033012f..9efe5259402b 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, struct 
iopf_fault *iopf,
return iommu_page_response(dev, &resp);
 }
 
-static enum iommu_page_response_code
-iopf_handle_single(struct iopf_fault *iopf)
-{
-   vm_fault_t ret;
-   struct mm_struct *mm;
-   struct vm_area_struct *vma;
-   unsigned int access_flags = 0;
-   unsigned int fault_flags = FAULT_FLAG_REMOTE;
-   struct iommu_fault_page_request *prm = &iopf->fault.prm;
-   enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
-
-   if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
-   return status;
-
-   mm = iommu_sva_find(prm->pasid);
-   if (IS_ERR_OR_NULL(mm))
-   return status;
-
-   mmap_read_lock(mm);
-
-   vma = find_extend_vma(mm, prm->addr);
-   if (!vma)
-   /* Unmapped area */
-   goto out_put_mm;
-
-   if (prm->perm & IOMMU_FAULT_PERM_READ)
-   access_flags |= VM_READ;
-
-   if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
-   access_flags |= VM_WRITE;
-   fault_flags |= FAULT_FLAG_WRITE;
-   }
-
-   if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
-   access_flags |= VM_EXEC;
-   fault_flags |= FAULT_FLAG_INSTRUCTION;
-   }
-
-   if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
-   fault_flags |= FAULT_FLAG_USER;
-
-   if (access_flags & ~vma->vm_flags)
-   /* Access fault */
-   goto out_put_mm;
-
-   ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
-   status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
-   IOMMU_PAGE_RESP_SUCCESS;
-
-out_put_mm:
-   mmap_read_unlock(mm);
-   mmput(mm);
-
-   return status;
-}
-
 static void iopf_handle_group(struct work_struct *work)
 {
struct iopf_group *group;
+   struct iommu_domain *domain;
struct iopf_fault *iopf, *next;
enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
 
group = container_of(work, struct iopf_group, work);
+   domain = iommu_get_domain_for_iopf(group->dev,
+  group->last_fault.fault.prm.pasid);
+   if (!domain || !domain->iopf_handler)
+   status = IOMMU_PAGE_RESP_INVALID;
 
list_for_each_entry_safe(iopf, next, &group->faults, list) {
/*
@@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct *work)
 * faults in the group if there is an error.
 */
if (status == IOMMU_PAGE_RESP_SUCCESS)
-   status = iopf_handle_single(iopf);
+   status = domain->iopf_handler(&iopf->fault,
+ domain->fault_data);
 
if (!(iopf->fault.prm.flags &
  IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 32c836e4a60e..ea12504a9e12 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -52,26 +52,6 @@ static int iommu_sva_alloc_pasid(struct mm_struct *mm,
return ret;
 }
 
-/* ioasid_find getter() requires a void * argument */
-static bool __mmget_not_zero(void *mm)
-{
-   return mmget_not_zero(mm);
-}
-
-/**
- * iommu_sva_find() - Find mm associated to the given PASID
- * @pasid: Process Address Space ID assigned to the mm
- *
- * On success a reference to the mm is taken, and must be released with 
mmput().
- *
- * Returns the mm corresponding to this PASID, or an error if not found.
- */
-struct mm_struct *iommu_sva_find(ioasid_t pasid)
-{
-   return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
-}
-EXPORT_SYMBOL_GPL(iommu_sva_find);
-
 /*
  *

[PATCH v6 10/12] iommu: Prepare IOMMU domain for IOPF

2022-05-09 Thread Lu Baolu
This adds some mechanisms around the iommu_domain so that the I/O page
fault handling framework could route a page fault to the domain and
call the fault handler from it.

Add pointers to the page fault handler and its private data in struct
iommu_domain. The fault handler will be called with the private data
as a parameter once a page fault is routed to the domain. Any kernel
component which owns an iommu domain could install handler and its
private parameter so that the page fault could be further routed and
handled.

A new helper iommu_get_domain_for_iopf() which retrieves attached domain
for a {device, PASID} pair is added. It will be used by the page fault
handling framework which knows {device, PASID} reported from the iommu
driver. We have a guarantee that the SVA domain doesn't go away during
IOPF handling, because unbind() waits for pending faults with
iopf_queue_flush_dev() before freeing the domain. Hence, there's no need
to synchronize life cycle of the iommu domains between the unbind() and
the interrupt threads.

This also prepares the SVA implementation to be the first consumer of
the per-domain page fault handling model.

Suggested-by: Jean-Philippe Brucker 
Signed-off-by: Lu Baolu 
---
 include/linux/iommu.h | 12 +++
 drivers/iommu/io-pgfault.c|  7 
 drivers/iommu/iommu-sva-lib.c | 65 +++
 drivers/iommu/iommu.c | 27 +++
 4 files changed, 111 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 392b8adc3495..9405034e3013 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -103,6 +103,9 @@ struct iommu_domain {
 #ifdef CONFIG_IOMMU_SVA
struct mm_struct *mm;
 #endif
+   enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
+ void *data);
+   void *fault_data;
 };
 
 static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -687,6 +690,9 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
  struct device *dev, ioasid_t pasid);
 void iommu_detach_device_pasid(struct iommu_domain *domain,
   struct device *dev, ioasid_t pasid);
+struct iommu_domain *
+iommu_get_domain_for_iopf(struct device *dev, ioasid_t pasid);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -1056,6 +1062,12 @@ static inline void iommu_detach_device_pasid(struct 
iommu_domain *domain,
 struct device *dev, ioasid_t pasid)
 {
 }
+
+static inline struct iommu_domain *
+iommu_get_domain_for_iopf(struct device *dev, ioasid_t pasid)
+{
+   return NULL;
+}
 #endif /* CONFIG_IOMMU_API */
 
 #ifdef CONFIG_IOMMU_SVA
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 1df8c1dcae77..aee9e033012f 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -181,6 +181,13 @@ static void iopf_handle_group(struct work_struct *work)
  * request completes, outstanding faults will have been dealt with by the time
  * the PASID is freed.
  *
+ * Any valid page fault will be eventually routed to an iommu domain and the
+ * page fault handler installed there will get called. The users of this
+ * handling framework should guarantee that the iommu domain could only be
+ * freed after the device has stopped generating page faults (or the iommu
+ * hardware has been set to block the page faults) and the pending page faults
+ * have been flushed.
+ *
  * Return: 0 on success and <0 on error.
  */
 int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index ef6ed87d04ba..32c836e4a60e 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -72,6 +72,69 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
 }
 EXPORT_SYMBOL_GPL(iommu_sva_find);
 
+/*
+ * I/O page fault handler for SVA
+ *
+ * Copied from io-pgfault.c with mmget_not_zero() added before
+ * mmap_read_lock().
+ */
+static enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+   vm_fault_t ret;
+   struct mm_struct *mm;
+   struct vm_area_struct *vma;
+   unsigned int access_flags = 0;
+   struct iommu_domain *domain = data;
+   unsigned int fault_flags = FAULT_FLAG_REMOTE;
+   struct iommu_fault_page_request *prm = &fault->prm;
+   enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+   if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+   return status;
+
+   mm = domain->mm;
+   if (IS_ERR_OR_NULL(mm) || !mmget_not_zero(mm))
+   return status;
+
+   mmap_read_lock(mm);
+
+   vma = find_extend_vma(mm, prm->addr);
+   if (!vma)
+   /* Unmapped area */
+   goto out_put_mm;
+
+   if (prm->perm & IOMMU_FAULT_PERM_READ)
+   access_flags |= V

[PATCH v6 09/12] iommu: Remove SVA related callbacks from iommu ops

2022-05-09 Thread Lu Baolu
These ops'es have been replaced with the dev_attach/detach_pasid domain
ops'es. There's no need for them anymore. Remove them to avoid dead
code.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
 include/linux/intel-iommu.h   |  4 --
 include/linux/iommu.h |  8 ---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 17 ---
 drivers/iommu/iommu-sva-lib.h |  1 -
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 41 
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  3 --
 drivers/iommu/intel/iommu.c   |  3 --
 drivers/iommu/intel/svm.c | 49 ---
 drivers/iommu/iommu-sva-lib.c |  4 +-
 9 files changed, 2 insertions(+), 128 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2397c2007cda..4a5cc796f917 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -738,10 +738,6 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 
*bus, u8 *devfn);
 extern void intel_svm_check(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
-struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
-void *drvdata);
-void intel_svm_unbind(struct iommu_sva *handle);
-u32 intel_svm_get_pasid(struct iommu_sva *handle);
 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
struct iommu_page_response *msg);
 struct iommu_domain *intel_svm_domain_alloc(void);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5a3ef4d58b1f..392b8adc3495 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -215,9 +215,6 @@ struct iommu_iotlb_gather {
  * @dev_has/enable/disable_feat: per device entries to check/enable/disable
  *   iommu specific features.
  * @dev_feat_enabled: check enabled feature
- * @sva_bind: Bind process address space to device
- * @sva_unbind: Unbind process address space from device
- * @sva_get_pasid: Get PASID associated to a SVA handle
  * @page_response: handle page request response
  * @def_domain_type: device default domain type, return value:
  * - IOMMU_DOMAIN_IDENTITY: must use an identity domain
@@ -251,11 +248,6 @@ struct iommu_ops {
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
 
-   struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
- void *drvdata);
-   void (*sva_unbind)(struct iommu_sva *handle);
-   u32 (*sva_get_pasid)(struct iommu_sva *handle);
-
int (*page_response)(struct device *dev,
 struct iommu_fault_event *evt,
 struct iommu_page_response *msg);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index e077f21e2528..15dd4c7e6d3a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -754,10 +754,6 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master 
*master);
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
 bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
-struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
-   void *drvdata);
-void arm_smmu_sva_unbind(struct iommu_sva *handle);
-u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
 void arm_smmu_sva_notifier_synchronize(void);
 struct iommu_domain *arm_smmu_sva_domain_alloc(void);
 #else /* CONFIG_ARM_SMMU_V3_SVA */
@@ -791,19 +787,6 @@ static inline bool arm_smmu_master_iopf_supported(struct 
arm_smmu_master *master
return false;
 }
 
-static inline struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
-{
-   return ERR_PTR(-ENODEV);
-}
-
-static inline void arm_smmu_sva_unbind(struct iommu_sva *handle) {}
-
-static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
-{
-   return IOMMU_PASID_INVALID;
-}
-
 static inline void arm_smmu_sva_notifier_synchronize(void) {}
 
 static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void)
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 8909ea1094e3..3420654c6e2f 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -8,7 +8,6 @@
 #include 
 #include 
 
-int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
 struct mm_struct *iommu_sva_find(ioasid_t pasid);
 
 /* I/O Page fault */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
b/drivers/iommu/arm/arm-

[PATCH v6 08/12] iommu/sva: Use attach/detach_pasid_dev in SVA interfaces

2022-05-09 Thread Lu Baolu
The existing iommu SVA interfaces are implemented by calling the SVA
specific iommu ops provided by the IOMMU drivers. There's no need for
any SVA specific ops in iommu_ops vector anymore as we can achieve
this through the generic attach/detach_dev_pasid domain ops.

This refactors the IOMMU SVA interfaces implementation by using the
attach/detach_pasid_dev ops and align them with the concept of the
iommu domain. Put the new SVA code in the sva related file in order
to make it self-contained.

Signed-off-by: Lu Baolu 
---
 include/linux/iommu.h |  44 ++-
 drivers/iommu/iommu-sva-lib.c | 145 ++
 drivers/iommu/iommu.c |  92 -
 3 files changed, 168 insertions(+), 113 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2921e634491e..5a3ef4d58b1f 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -684,12 +684,6 @@ int iommu_dev_enable_feature(struct device *dev, enum 
iommu_dev_features f);
 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
 bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f);
 
-struct iommu_sva *iommu_sva_bind_device(struct device *dev,
-   struct mm_struct *mm,
-   void *drvdata);
-void iommu_sva_unbind_device(struct iommu_sva *handle);
-u32 iommu_sva_get_pasid(struct iommu_sva *handle);
-
 int iommu_device_use_default_domain(struct device *dev);
 void iommu_device_unuse_default_domain(struct device *dev);
 
@@ -1031,21 +1025,6 @@ iommu_dev_disable_feature(struct device *dev, enum 
iommu_dev_features feat)
return -ENODEV;
 }
 
-static inline struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
-{
-   return NULL;
-}
-
-static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
-{
-}
-
-static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
-{
-   return IOMMU_PASID_INVALID;
-}
-
 static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
 {
return NULL;
@@ -1087,6 +1066,29 @@ static inline void iommu_detach_device_pasid(struct 
iommu_domain *domain,
 }
 #endif /* CONFIG_IOMMU_API */
 
+#ifdef CONFIG_IOMMU_SVA
+struct iommu_sva *iommu_sva_bind_device(struct device *dev,
+   struct mm_struct *mm,
+   void *drvdata);
+void iommu_sva_unbind_device(struct iommu_sva *handle);
+u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+#else /* CONFIG_IOMMU_SVA */
+static inline struct iommu_sva *
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+   return NULL;
+}
+
+static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+}
+
+static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+   return IOMMU_PASID_INVALID;
+}
+#endif /* CONFIG_IOMMU_SVA */
+
 /**
  * iommu_map_sgtable - Map the given buffer to the IOMMU domain
  * @domain:The IOMMU domain to perform the mapping
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 106506143896..e7301514f286 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -3,6 +3,8 @@
  * Helpers for IOMMU drivers implementing SVA
  */
 #include 
+#include 
+#include 
 #include 
 
 #include "iommu-sva-lib.h"
@@ -69,3 +71,146 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
 }
 EXPORT_SYMBOL_GPL(iommu_sva_find);
+
+/*
+ * IOMMU SVA driver-oriented interfaces
+ */
+static struct iommu_domain *
+iommu_sva_alloc_domain(struct device *dev, struct mm_struct *mm)
+{
+   struct bus_type *bus = dev->bus;
+   struct iommu_domain *domain;
+
+   if (!bus || !bus->iommu_ops)
+   return NULL;
+
+   domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_SVA);
+   if (!domain)
+   return NULL;
+
+   mmgrab(mm);
+   domain->mm = mm;
+   domain->type = IOMMU_DOMAIN_SVA;
+
+   return domain;
+}
+
+static void iommu_sva_free_domain(struct iommu_domain *domain)
+{
+   mmdrop(domain->mm);
+   iommu_domain_free(domain);
+}
+
+/**
+ * iommu_sva_bind_device() - Bind a process address space to a device
+ * @dev: the device
+ * @mm: the mm to bind, caller must hold a reference to mm_users
+ * @drvdata: opaque data pointer to pass to bind callback
+ *
+ * Create a bond between device and address space, allowing the device to 
access
+ * the mm using the returned PASID. If a bond already exists between @device 
and
+ * @mm, it is returned and an additional reference is taken. Caller must call
+ * iommu_sva_unbind_device() to release each reference.
+ *
+ * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
+ * initialize the required SVA features.
+ *
+ * On error, returns an ERR_PTR value.
+ */
+struct i

[PATCH v6 07/12] arm-smmu-v3/sva: Add SVA domain support

2022-05-09 Thread Lu Baolu
Add support for SVA domain allocation and provide an SVA-specific
iommu_domain_ops.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  6 ++
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 68 +++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  3 +
 3 files changed, 77 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index cd48590ada30..e077f21e2528 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -759,6 +759,7 @@ struct iommu_sva *arm_smmu_sva_bind(struct device *dev, 
struct mm_struct *mm,
 void arm_smmu_sva_unbind(struct iommu_sva *handle);
 u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
 void arm_smmu_sva_notifier_synchronize(void);
+struct iommu_domain *arm_smmu_sva_domain_alloc(void);
 #else /* CONFIG_ARM_SMMU_V3_SVA */
 static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
 {
@@ -804,5 +805,10 @@ static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva 
*handle)
 }
 
 static inline void arm_smmu_sva_notifier_synchronize(void) {}
+
+static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void)
+{
+   return NULL;
+}
 #endif /* CONFIG_ARM_SMMU_V3_SVA */
 #endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index c623dae1e115..9d176e836d6b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -541,3 +541,71 @@ void arm_smmu_sva_notifier_synchronize(void)
 */
mmu_notifier_synchronize();
 }
+
+static int arm_smmu_sva_attach_dev_pasid(struct iommu_domain *domain,
+struct device *dev, ioasid_t id)
+{
+   int ret = 0;
+   struct mm_struct *mm;
+   struct iommu_sva *handle;
+
+   if (domain->type != IOMMU_DOMAIN_SVA)
+   return -EINVAL;
+
+   mm = domain->mm;
+   if (WARN_ON(!mm))
+   return -ENODEV;
+
+   mutex_lock(&sva_lock);
+   handle = __arm_smmu_sva_bind(dev, mm);
+   if (IS_ERR(handle))
+   ret = PTR_ERR(handle);
+   mutex_unlock(&sva_lock);
+
+   return ret;
+}
+
+static void arm_smmu_sva_detach_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t id)
+{
+   struct mm_struct *mm = domain->mm;
+   struct arm_smmu_bond *bond = NULL, *t;
+   struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+   mutex_lock(&sva_lock);
+   list_for_each_entry(t, &master->bonds, list) {
+   if (t->mm == mm) {
+   bond = t;
+   break;
+   }
+   }
+
+   if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
+   list_del(&bond->list);
+   arm_smmu_mmu_notifier_put(bond->smmu_mn);
+   kfree(bond);
+   }
+   mutex_unlock(&sva_lock);
+}
+
+static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
+{
+   kfree(domain);
+}
+
+static const struct iommu_domain_ops arm_smmu_sva_domain_ops = {
+   .attach_dev_pasid   = arm_smmu_sva_attach_dev_pasid,
+   .detach_dev_pasid   = arm_smmu_sva_detach_dev_pasid,
+   .free   = arm_smmu_sva_domain_free,
+};
+
+struct iommu_domain *arm_smmu_sva_domain_alloc(void)
+{
+   struct iommu_domain *domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (domain)
+   domain->ops = &arm_smmu_sva_domain_ops;
+
+   return domain;
+}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index afc63fce6107..9daf3de7e539 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1999,6 +1999,9 @@ static struct iommu_domain 
*arm_smmu_domain_alloc(unsigned type)
 {
struct arm_smmu_domain *smmu_domain;
 
+   if (type == IOMMU_DOMAIN_SVA)
+   return arm_smmu_sva_domain_alloc();
+
if (type != IOMMU_DOMAIN_UNMANAGED &&
type != IOMMU_DOMAIN_DMA &&
type != IOMMU_DOMAIN_DMA_FQ &&
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 05/12] iommu/vt-d: Remove SVM_FLAG_SUPERVISOR_MODE support

2022-05-09 Thread Lu Baolu
The current kernel DMA with PASID support is based on the SVA with a flag
SVM_FLAG_SUPERVISOR_MODE. The IOMMU driver binds the kernel memory address
space to a PASID of the device. The device driver programs the device with
kernel virtual address (KVA) for DMA access. There have been security and
functional issues with this approach:

- The lack of IOTLB synchronization upon kernel page table updates.
  (vmalloc, module/BPF loading, CONFIG_DEBUG_PAGEALLOC etc.)
- Other than slight more protection, using kernel virtual address (KVA)
  has little advantage over physical address. There are also no use
  cases yet where DMA engines need kernel virtual addresses for in-kernel
  DMA.

This removes SVM_FLAG_SUPERVISOR_MODE support in the Intel IOMMU driver.
The device driver is suggested to handle kernel DMA with PASID through
the kernel DMA APIs.

Link: https://lore.kernel.org/linux-iommu/20210511194726.gp1002...@nvidia.com/
Signed-off-by: Jacob Pan 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/svm.c | 53 +--
 1 file changed, 12 insertions(+), 41 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7ee37d996e15..574aa33a 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -313,8 +313,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned 
int pasid,
return 0;
 }
 
-static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
-unsigned int flags)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm)
 {
ioasid_t max_pasid = dev_is_pci(dev) ?
pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
@@ -324,8 +323,7 @@ static int intel_svm_alloc_pasid(struct device *dev, struct 
mm_struct *mm,
 
 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
   struct device *dev,
-  struct mm_struct *mm,
-  unsigned int flags)
+  struct mm_struct *mm)
 {
struct device_domain_info *info = dev_iommu_priv_get(dev);
unsigned long iflags, sflags;
@@ -341,22 +339,18 @@ static struct iommu_sva *intel_svm_bind_mm(struct 
intel_iommu *iommu,
 
svm->pasid = mm->pasid;
svm->mm = mm;
-   svm->flags = flags;
INIT_LIST_HEAD_RCU(&svm->devs);
 
-   if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
-   svm->notifier.ops = &intel_mmuops;
-   ret = mmu_notifier_register(&svm->notifier, mm);
-   if (ret) {
-   kfree(svm);
-   return ERR_PTR(ret);
-   }
+   svm->notifier.ops = &intel_mmuops;
+   ret = mmu_notifier_register(&svm->notifier, mm);
+   if (ret) {
+   kfree(svm);
+   return ERR_PTR(ret);
}
 
ret = pasid_private_add(svm->pasid, svm);
if (ret) {
-   if (svm->notifier.ops)
-   mmu_notifier_unregister(&svm->notifier, mm);
+   mmu_notifier_unregister(&svm->notifier, mm);
kfree(svm);
return ERR_PTR(ret);
}
@@ -391,9 +385,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct 
intel_iommu *iommu,
}
 
/* Setup the pasid table: */
-   sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
-   PASID_FLAG_SUPERVISOR_MODE : 0;
-   sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+   sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
FLPT_DEFAULT_DID, sflags);
@@ -410,8 +402,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct 
intel_iommu *iommu,
kfree(sdev);
 free_svm:
if (list_empty(&svm->devs)) {
-   if (svm->notifier.ops)
-   mmu_notifier_unregister(&svm->notifier, mm);
+   mmu_notifier_unregister(&svm->notifier, mm);
pasid_private_remove(mm->pasid);
kfree(svm);
}
@@ -821,37 +812,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, 
void *drvdata)
 {
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
-   unsigned int flags = 0;
struct iommu_sva *sva;
int ret;
 
-   if (drvdata)
-   flags = *(unsigned int *)drvdata;
-
-   if (flags & SVM_FLAG_SUPERVISOR_MODE) {
-   if (!ecap_srs(iommu->ecap)) {
-   

[PATCH v6 06/12] iommu/vt-d: Add SVA domain support

2022-05-09 Thread Lu Baolu
Add support for SVA domain allocation and provide an SVA-specific
iommu_domain_ops.

Signed-off-by: Lu Baolu 
---
 include/linux/intel-iommu.h |  5 
 drivers/iommu/intel/iommu.c |  2 ++
 drivers/iommu/intel/svm.c   | 48 +
 3 files changed, 55 insertions(+)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 72e5d7900e71..2397c2007cda 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -744,6 +744,7 @@ void intel_svm_unbind(struct iommu_sva *handle);
 u32 intel_svm_get_pasid(struct iommu_sva *handle);
 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
struct iommu_page_response *msg);
+struct iommu_domain *intel_svm_domain_alloc(void);
 
 struct intel_svm_dev {
struct list_head list;
@@ -769,6 +770,10 @@ struct intel_svm {
 };
 #else
 static inline void intel_svm_check(struct intel_iommu *iommu) {}
+static inline struct iommu_domain *intel_svm_domain_alloc(void)
+{
+   return NULL;
+}
 #endif
 
 #ifdef CONFIG_INTEL_IOMMU_DEBUGFS
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 99643f897f26..10b1e9dcbd98 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4343,6 +4343,8 @@ static struct iommu_domain 
*intel_iommu_domain_alloc(unsigned type)
return domain;
case IOMMU_DOMAIN_IDENTITY:
return &si_domain->domain;
+   case IOMMU_DOMAIN_SVA:
+   return intel_svm_domain_alloc();
default:
return NULL;
}
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 574aa33a..641ab0491ada 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -931,3 +931,51 @@ int intel_svm_page_response(struct device *dev,
mutex_unlock(&pasid_mutex);
return ret;
 }
+
+static int intel_svm_attach_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+   struct device_domain_info *info = dev_iommu_priv_get(dev);
+   struct intel_iommu *iommu = info->iommu;
+   struct mm_struct *mm = domain->mm;
+   struct iommu_sva *sva;
+   int ret = 0;
+
+   mutex_lock(&pasid_mutex);
+   sva = intel_svm_bind_mm(iommu, dev, mm);
+   if (IS_ERR(sva))
+   ret = PTR_ERR(sva);
+   mutex_unlock(&pasid_mutex);
+
+   return ret;
+}
+
+static void intel_svm_detach_dev_pasid(struct iommu_domain *domain,
+  struct device *dev, ioasid_t pasid)
+{
+   mutex_lock(&pasid_mutex);
+   intel_svm_unbind_mm(dev, pasid);
+   mutex_unlock(&pasid_mutex);
+}
+
+static void intel_svm_domain_free(struct iommu_domain *domain)
+{
+   kfree(domain);
+}
+
+static const struct iommu_domain_ops intel_svm_domain_ops = {
+   .attach_dev_pasid   = intel_svm_attach_dev_pasid,
+   .detach_dev_pasid   = intel_svm_detach_dev_pasid,
+   .free   = intel_svm_domain_free,
+};
+
+struct iommu_domain *intel_svm_domain_alloc(void)
+{
+   struct iommu_domain *domain;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (domain)
+   domain->ops = &intel_svm_domain_ops;
+
+   return domain;
+}
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 04/12] iommu/sva: Basic data structures for SVA

2022-05-09 Thread Lu Baolu
Use below data structures for SVA implementation in the IOMMU core:

- struct iommu_domain (IOMMU_DOMAIN_SVA type)
  Represent a hardware pagetable that the IOMMU hardware could use for
  SVA translation. Multiple iommu domains could be bound with an SVA mm
  and each grabs a mm_count of the mm in order to make sure mm could
  only be freed after all domains have been unbound. A new mm field is
  added to struct iommu_domain and a helper is added to retrieve mm from
  a domain pointer.

- struct iommu_sva (existing)
  Represent a bond relationship between an SVA ioas and an iommu domain.
  If a bond already exists, it's reused and a reference is taken.

- struct dev_iommu::sva_bonds
  A pasid-indexed xarray to track the bonds happened on the device.

Suggested-by: Jean-Philippe Brucker 
Signed-off-by: Lu Baolu 
---
 include/linux/iommu.h | 13 +
 drivers/iommu/iommu.c |  3 +++
 2 files changed, 16 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ab36244d4e94..2921e634491e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -64,6 +64,9 @@ struct iommu_domain_geometry {
 #define __IOMMU_DOMAIN_PT  (1U << 2)  /* Domain is identity mapped   */
 #define __IOMMU_DOMAIN_DMA_FQ  (1U << 3)  /* DMA-API uses flush queue*/
 
+#define __IOMMU_DOMAIN_SHARED  (1U << 4)  /* Page table shared from CPU  */
+#define __IOMMU_DOMAIN_HOST_VA (1U << 5)  /* Host CPU virtual address */
+
 /*
  * This are the possible domain-types
  *
@@ -86,6 +89,8 @@ struct iommu_domain_geometry {
 #define IOMMU_DOMAIN_DMA_FQ(__IOMMU_DOMAIN_PAGING |\
 __IOMMU_DOMAIN_DMA_API |   \
 __IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_SVA   (__IOMMU_DOMAIN_SHARED |\
+__IOMMU_DOMAIN_HOST_VA)
 
 struct iommu_domain {
unsigned type;
@@ -95,6 +100,9 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+#ifdef CONFIG_IOMMU_SVA
+   struct mm_struct *mm;
+#endif
 };
 
 static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -380,6 +388,9 @@ struct dev_iommu {
struct iommu_device *iommu_dev;
void*priv;
unsigned intpasid_bits;
+#ifdef CONFIG_IOMMU_SVA
+   struct xarray   sva_bonds;
+#endif
 };
 
 int iommu_device_register(struct iommu_device *iommu,
@@ -629,6 +640,8 @@ struct iommu_fwspec {
  */
 struct iommu_sva {
struct device   *dev;
+   struct iommu_domain *domain;
+   refcount_t  users;
 };
 
 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 16e8db2d86fc..1abff5fc9554 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -202,6 +202,9 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
return NULL;
 
mutex_init(¶m->lock);
+#ifdef CONFIG_IOMMU_SVA
+   xa_init(¶m->sva_bonds);
+#endif
dev->iommu = param;
return param;
 }
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 01/12] dmaengine: idxd: Separate user and kernel pasid enabling

2022-05-09 Thread Lu Baolu
From: Dave Jiang 

The idxd driver always gated the pasid enabling under a single knob and
this assumption is incorrect. The pasid used for kernel operation can be
independently toggled and has no dependency on the user pasid (and vice
versa). Split the two so they are independent "enabled" flags.

Cc: Vinod Koul 
Signed-off-by: Dave Jiang 
Signed-off-by: Jacob Pan 
Link: 
https://lore.kernel.org/linux-iommu/20220315050713.2000518-10-jacob.jun@linux.intel.com/
Signed-off-by: Lu Baolu 
---
 drivers/dma/idxd/idxd.h |  6 ++
 drivers/dma/idxd/cdev.c |  4 ++--
 drivers/dma/idxd/init.c | 30 ++
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index da72eb15f610..ccbefd0be617 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -239,6 +239,7 @@ enum idxd_device_flag {
IDXD_FLAG_CONFIGURABLE = 0,
IDXD_FLAG_CMD_RUNNING,
IDXD_FLAG_PASID_ENABLED,
+   IDXD_FLAG_USER_PASID_ENABLED,
 };
 
 struct idxd_dma_dev {
@@ -469,6 +470,11 @@ static inline bool device_pasid_enabled(struct idxd_device 
*idxd)
return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
 }
 
+static inline bool device_user_pasid_enabled(struct idxd_device *idxd)
+{
+   return test_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
+}
+
 static inline bool device_swq_supported(struct idxd_device *idxd)
 {
return (support_enqcmd && device_pasid_enabled(idxd));
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index b9b2b4a4124e..7df996deffbe 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -99,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file 
*filp)
ctx->wq = wq;
filp->private_data = ctx;
 
-   if (device_pasid_enabled(idxd)) {
+   if (device_user_pasid_enabled(idxd)) {
sva = iommu_sva_bind_device(dev, current->mm, NULL);
if (IS_ERR(sva)) {
rc = PTR_ERR(sva);
@@ -152,7 +152,7 @@ static int idxd_cdev_release(struct inode *node, struct 
file *filep)
if (wq_shared(wq)) {
idxd_device_drain_pasid(idxd, ctx->pasid);
} else {
-   if (device_pasid_enabled(idxd)) {
+   if (device_user_pasid_enabled(idxd)) {
/* The wq disable in the disable pasid function will 
drain the wq */
rc = idxd_wq_disable_pasid(wq);
if (rc < 0)
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 993a5dcca24f..e1b5d1e4a949 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -513,16 +513,19 @@ static int idxd_probe(struct idxd_device *idxd)
 
if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA);
-   if (rc == 0) {
-   rc = idxd_enable_system_pasid(idxd);
-   if (rc < 0) {
-   iommu_dev_disable_feature(dev, 
IOMMU_DEV_FEAT_SVA);
-   dev_warn(dev, "Failed to enable PASID. No SVA 
support: %d\n", rc);
-   } else {
-   set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
-   }
-   } else {
+   if (rc) {
+   /*
+* Do not bail here since legacy DMA is still
+* supported, both user and in-kernel DMA with
+* PASID rely on SVA feature.
+*/
dev_warn(dev, "Unable to turn on SVA feature.\n");
+   } else {
+   set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
+   if (idxd_enable_system_pasid(idxd))
+   dev_warn(dev, "No in-kernel DMA with PASID.\n");
+   else
+   set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
}
} else if (!sva) {
dev_warn(dev, "User forced SVA off via module param.\n");
@@ -561,7 +564,8 @@ static int idxd_probe(struct idxd_device *idxd)
  err:
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
-   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+   if (device_user_pasid_enabled(idxd) || device_pasid_enabled(idxd))
+   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
return rc;
 }
 
@@ -574,7 +578,8 @@ static void idxd_cleanup(struct idxd_device *idxd)
idxd_cleanup_internals(idxd);
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
-   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+   if (device_user_pasid_enabled(idxd) || device_pasid_enabled(idxd))
+   iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
 }
 
 static int i

[PATCH v6 02/12] iommu: Add pasid_bits field in struct dev_iommu

2022-05-09 Thread Lu Baolu
Use this field to save the pasid/ssid bits that a device is able to
support with its IOMMU hardware. It is a generic attribute of a device
and lifting it into the per-device dev_iommu struct makes it possible
to allocate a PASID for device without calls into the IOMMU drivers.
Any iommu driver which suports PASID related features should set this
field before features are enabled on the devices.

For initialization of this field in the VT-d driver, the
info->pasid_supported is only set for PCI devices. So the status is
that non-PCI SVA hasn't been supported yet. Setting this field only for
PCI devices has no functional change.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
 include/linux/iommu.h   | 1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 ++
 drivers/iommu/intel/iommu.c | 5 -
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e1afe169549..b8ffaf2cb1d0 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -373,6 +373,7 @@ struct dev_iommu {
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void*priv;
+   unsigned intpasid_bits;
 };
 
 int iommu_device_register(struct iommu_device *iommu,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 627a3ed5ee8f..afc63fce6107 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2681,6 +2681,8 @@ static struct iommu_device *arm_smmu_probe_device(struct 
device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
 
+   dev->iommu->pasid_bits = master->ssid_bits;
+
return &smmu->iommu;
 
 err_free_master:
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2990f80c5e08..99643f897f26 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4624,8 +4624,11 @@ static struct iommu_device 
*intel_iommu_probe_device(struct device *dev)
if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
 
-   if (features >= 0)
+   if (features >= 0) {
info->pasid_supported = features | 1;
+   dev->iommu->pasid_bits =
+   fls(pci_max_pasids(pdev)) - 1;
+   }
}
 
if (info->ats_supported && ecap_prs(iommu->ecap) &&
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 03/12] iommu: Add attach/detach_dev_pasid domain ops

2022-05-09 Thread Lu Baolu
Attaching an IOMMU domain to a PASID of a device is a generic operation
for modern IOMMU drivers which support PASID-granular DMA address
translation. Currently visible usage scenarios include (but not limited):

 - SVA (Shared Virtual Address)
 - kernel DMA with PASID
 - hardware-assist mediated device

This adds a pair of common domain ops for this purpose and adds helpers
to attach/detach a domain to/from a {device, PASID}. Some buses, like
PCI, route packets without considering the PASID value. Thus a DMA target
address with PASID might be treated as P2P if the address falls into the
MMIO BAR of other devices in the group. To make things simple, these
interfaces only apply to devices belonging to the singleton groups, and
the singleton is immutable in fabric i.e. not affected by hotplug.

Signed-off-by: Lu Baolu 
Reviewed-by: Jean-Philippe Brucker 
---
 include/linux/iommu.h | 21 +
 drivers/iommu/iommu.c | 71 +++
 2 files changed, 92 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b8ffaf2cb1d0..ab36244d4e94 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -263,6 +263,8 @@ struct iommu_ops {
  * struct iommu_domain_ops - domain specific operations
  * @attach_dev: attach an iommu domain to a device
  * @detach_dev: detach an iommu domain from a device
+ * @attach_dev_pasid: attach an iommu domain to a pasid of device
+ * @detach_dev_pasid: detach an iommu domain from a pasid of device
  * @map: map a physically contiguous memory region to an iommu domain
  * @map_pages: map a physically contiguous set of pages of the same size to
  * an iommu domain.
@@ -283,6 +285,10 @@ struct iommu_ops {
 struct iommu_domain_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+   int (*attach_dev_pasid)(struct iommu_domain *domain,
+   struct device *dev, ioasid_t pasid);
+   void (*detach_dev_pasid)(struct iommu_domain *domain,
+struct device *dev, ioasid_t pasid);
 
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
@@ -678,6 +684,10 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, 
void *owner);
 void iommu_group_release_dma_owner(struct iommu_group *group);
 bool iommu_group_dma_owner_claimed(struct iommu_group *group);
 
+int iommu_attach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid);
+void iommu_detach_device_pasid(struct iommu_domain *domain,
+  struct device *dev, ioasid_t pasid);
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -1051,6 +1061,17 @@ static inline bool iommu_group_dma_owner_claimed(struct 
iommu_group *group)
 {
return false;
 }
+
+static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
+   struct device *dev, ioasid_t pasid)
+{
+   return -ENODEV;
+}
+
+static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
+struct device *dev, ioasid_t pasid)
+{
+}
 #endif /* CONFIG_IOMMU_API */
 
 /**
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 29906bc16371..16e8db2d86fc 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -38,6 +38,7 @@ struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
struct list_head devices;
+   struct xarray pasid_array;
struct mutex mutex;
void *iommu_data;
void (*iommu_data_release)(void *iommu_data);
@@ -630,6 +631,7 @@ struct iommu_group *iommu_group_alloc(void)
mutex_init(&group->mutex);
INIT_LIST_HEAD(&group->devices);
INIT_LIST_HEAD(&group->entry);
+   xa_init(&group->pasid_array);
 
ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
if (ret < 0) {
@@ -3190,3 +3192,72 @@ bool iommu_group_dma_owner_claimed(struct iommu_group 
*group)
return user;
 }
 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+
+static bool device_group_immutable_singleton(struct device *dev)
+{
+   struct iommu_group *group = iommu_group_get(dev);
+   int count;
+
+   if (!group)
+   return false;
+
+   mutex_lock(&group->mutex);
+   count = iommu_group_device_count(group);
+   mutex_unlock(&group->mutex);
+   iommu_group_put(group);
+
+   if (count != 1)
+   return false;
+
+   /*
+* The PCI device could be considered to be fully isolated if all
+* devices on the path from the device to the host-PCI bridge are
+* protected from peer-to-peer DMA by ACS.
+*/
+   if (dev_is_pci(dev))
+   return pci_acs_path_enabled(to_pci_dev(dev)

[PATCH v6 00/12] iommu: SVA and IOPF refactoring

2022-05-09 Thread Lu Baolu
Hi folks,

The former part of this series refactors the IOMMU SVA code by assigning
an SVA type of iommu_domain to a shared virtual address and replacing
sva_bind/unbind iommu ops with attach/detach_dev_pasid domain ops.

The latter part changes the existing I/O page fault handling framework
from only serving SVA to a generic one. Any driver or component could
handle the I/O page faults for its domain in its own way by installing
an I/O page fault handler.

This series has been functionally tested on an x86 machine and compile
tested for other architectures.

This series is also available on github:
[2] https://github.com/LuBaolu/intel-iommu/commits/iommu-sva-refactoring-v6

Please review and suggest.

Best regards,
baolu

Change log:
v6:
 - Refine the SVA basic data structures.
   Link: https://lore.kernel.org/linux-iommu/YnFv0ps0Ad8v+7uH@myrica/
 - Refine arm smmuv3 sva domain allocation.
 - Fix a possible lock issue.
   Link: https://lore.kernel.org/linux-iommu/YnFydE8j8l7Q4m+b@myrica/

v5:
 - 
https://lore.kernel.org/linux-iommu/20220502014842.991097-1-baolu...@linux.intel.com/
 - Address review comments from Jean-Philippe Brucker. Very appreciated!
 - Remove redundant pci aliases check in
   device_group_immutable_singleton().
 - Treat all buses exept PCI as static in immutable singleton check.
 - As the sva_bind/unbind() have already guaranteed sva domain free only
   after iopf_queue_flush_dev(), remove the unnecessary domain refcount.
 - Move domain get() out of the list iteration in iopf_handle_group().

v4:
 - 
https://lore.kernel.org/linux-iommu/20220421052121.3464100-1-baolu...@linux.intel.com/
 - Solve the overlap with another series and make this series
   self-contained.
 - No objection to the abstraction of data structure during v3 review.
   Hence remove the RFC subject prefix.
 - Refine the immutable singleton group code according to Kevin's
   comments.

v3:
 - 
https://lore.kernel.org/linux-iommu/20220410102443.294128-1-baolu...@linux.intel.com/
 - Rework iommu_group_singleton_lockdown() by adding a flag to the group
   that positively indicates the group can never have more than one
   member, even after hot plug.
 - Abstract the data structs used for iommu sva in a separated patches to
   make it easier for review.
 - I still keep the RFC prefix in this series as above two significant
   changes need at least another round review to be finalized.
 - Several misc refinements.

v2:
 - 
https://lore.kernel.org/linux-iommu/20220329053800.3049561-1-baolu...@linux.intel.com/
 - Add sva domain life cycle management to avoid race between unbind and
   page fault handling.
 - Use a single domain for each mm.
 - Return a single sva handler for the same binding.
 - Add a new helper to meet singleton group requirement.
 - Rework the SVA domain allocation for arm smmu v3 driver and move the
   pasid_bit initialization to device probe.
 - Drop the patch "iommu: Handle IO page faults directly".
 - Add mmget_not_zero(mm) in SVA page fault handler.

v1:
 - 
https://lore.kernel.org/linux-iommu/20220320064030.2936936-1-baolu...@linux.intel.com/
 - Initial post.

Dave Jiang (1):
  dmaengine: idxd: Separate user and kernel pasid enabling

Lu Baolu (11):
  iommu: Add pasid_bits field in struct dev_iommu
  iommu: Add attach/detach_dev_pasid domain ops
  iommu/sva: Basic data structures for SVA
  iommu/vt-d: Remove SVM_FLAG_SUPERVISOR_MODE support
  iommu/vt-d: Add SVA domain support
  arm-smmu-v3/sva: Add SVA domain support
  iommu/sva: Use attach/detach_pasid_dev in SVA interfaces
  iommu: Remove SVA related callbacks from iommu ops
  iommu: Prepare IOMMU domain for IOPF
  iommu: Per-domain I/O page fault handling
  iommu: Rename iommu-sva-lib.{c,h}

 include/linux/intel-iommu.h   |   9 +-
 include/linux/iommu.h |  99 +--
 drivers/dma/idxd/idxd.h   |   6 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  19 +-
 .../iommu/{iommu-sva-lib.h => iommu-sva.h}|   3 -
 drivers/dma/idxd/cdev.c   |   4 +-
 drivers/dma/idxd/init.c   |  30 +-
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 111 +---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  10 +-
 drivers/iommu/intel/iommu.c   |  12 +-
 drivers/iommu/intel/svm.c | 146 --
 drivers/iommu/io-pgfault.c|  73 +
 drivers/iommu/iommu-sva-lib.c |  71 -
 drivers/iommu/iommu-sva.c | 261 ++
 drivers/iommu/iommu.c | 193 +++--
 drivers/iommu/Makefile|   2 +-
 16 files changed, 623 insertions(+), 426 deletions(-)
 rename drivers/iommu/{iommu-sva-lib.h => iommu-sva.h} (91%)
 delete mode 100644 drivers/iommu/iommu-sva-lib.c
 create mode 100644 drivers/iommu/iommu-sva.c

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://l

RE: [PATCH] iommu: Reorganize __iommu_attach_group()

2022-05-09 Thread Tian, Kevin
> From: Jason Gunthorpe
> Sent: Saturday, May 7, 2022 1:55 AM
> 
> On Fri, May 06, 2022 at 05:44:11PM +0100, Robin Murphy wrote:
> >
> > So if it *is* a domain then I can call NULL->attach_dev() and...? ;)
> 
> You can call iommu_group_set_domain(group, NULL) and it will work.
> 
> As I said, it must have this symmetry:
> 
>  __iommu_group_attach_core_domain()
>  assert(__iommu_group_is_core_domain())
> 
> This is why I choose the name, because it is a clear pairing with
> __iommu_group_attach_core_domain().
> 
> How about __iommu_group_is_core_domain_attached() ? Solves the
> grammer
> issue

Or just __iommu_group_is_core_managed() to avoid the confusion
on whether NULL domain is considered as 'attached'? 'core managed'
can cover NULL domain as a policy in iommu core.

Alternatively we can also keep current name but moving the NULL domain
check out, i.e.:

assert(!group->domain || __iommu_group_is_core_domain(group));

This actually better pairs with __iommu_group_attach_core_domain()
as the latter is clearly defined for non-NULL domains:

+/*
+ * Put the group's domain back to the appropriate core-owned domain - either 
the
+ * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
+ */
+static void __iommu_group_set_core_domain(struct iommu_group *group)

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 7/8] iommu/vt-d: Remove domain_update_iommu_snooping()

2022-05-09 Thread Lu Baolu
The IOMMU force snooping capability is not required to be consistent
among all the IOMMUs anymore. Remove force snooping capability check
in the IOMMU hot-add path and domain_update_iommu_snooping() becomes
a dead code now.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Link: 
https://lore.kernel.org/r/20220508123525.1973626-1-baolu...@linux.intel.com
---
 drivers/iommu/intel/iommu.c | 34 +-
 1 file changed, 1 insertion(+), 33 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6ac49daa483b..e56b3a4b6998 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -533,33 +533,6 @@ static void domain_update_iommu_coherency(struct 
dmar_domain *domain)
rcu_read_unlock();
 }
 
-static bool domain_update_iommu_snooping(struct intel_iommu *skip)
-{
-   struct dmar_drhd_unit *drhd;
-   struct intel_iommu *iommu;
-   bool ret = true;
-
-   rcu_read_lock();
-   for_each_active_iommu(iommu, drhd) {
-   if (iommu != skip) {
-   /*
-* If the hardware is operating in the scalable mode,
-* the snooping control is always supported since we
-* always set PASID-table-entry.PGSNP bit if the domain
-* is managed outside (UNMANAGED).
-*/
-   if (!sm_supported(iommu) &&
-   !ecap_sc_support(iommu->ecap)) {
-   ret = false;
-   break;
-   }
-   }
-   }
-   rcu_read_unlock();
-
-   return ret;
-}
-
 static int domain_update_iommu_superpage(struct dmar_domain *domain,
 struct intel_iommu *skip)
 {
@@ -3593,12 +3566,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
iommu->name);
return -ENXIO;
}
-   if (!ecap_sc_support(iommu->ecap) &&
-   domain_update_iommu_snooping(iommu)) {
-   pr_warn("%s: Doesn't support snooping.\n",
-   iommu->name);
-   return -ENXIO;
-   }
+
sp = domain_update_iommu_superpage(NULL, iommu) - 1;
if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
pr_warn("%s: Doesn't support large page.\n",
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 8/8] iommu/vt-d: Remove hard coding PGSNP bit in PASID entries

2022-05-09 Thread Lu Baolu
As enforce_cache_coherency has been introduced into the iommu_domain_ops,
the kernel component which owns the iommu domain is able to opt-in its
requirement for force snooping support. The iommu driver has no need to
hard code the page snoop control bit in the PASID table entries anymore.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
Link: 
https://lore.kernel.org/r/20220508123525.1973626-1-baolu...@linux.intel.com
---
 drivers/iommu/intel/pasid.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index d19dd66a670c..cb4c1d0cf25c 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -710,9 +710,6 @@ int intel_pasid_setup_second_level(struct intel_iommu 
*iommu,
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
 
-   if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
-   pasid_set_pgsnp(pte);
-
/*
 * Since it is a second level only translation setup, we should
 * set SRE bit as well (addresses are expected to be GPAs).
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/8] iommu/vt-d: Check domain force_snooping against attached devices

2022-05-09 Thread Lu Baolu
As domain->force_snooping only impacts the devices attached with the
domain, there's no need to check against all IOMMU units. On the other
hand, force_snooping could be set on a domain no matter whether it has
been attached or not, and once set it is an immutable flag. If no
device attached, the operation always succeeds. Then this empty domain
can be only attached to a device of which the IOMMU supports snoop
control.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
Link: 
https://lore.kernel.org/r/20220508123525.1973626-1-baolu...@linux.intel.com
---
 include/linux/intel-iommu.h |  1 +
 drivers/iommu/intel/pasid.h |  2 ++
 drivers/iommu/intel/iommu.c | 53 ++---
 drivers/iommu/intel/pasid.c | 42 +
 4 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 72e5d7900e71..4f29139bbfc3 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -540,6 +540,7 @@ struct dmar_domain {
u8 has_iotlb_device: 1;
u8 iommu_coherency: 1;  /* indicate coherency of iommu access */
u8 force_snooping : 1;  /* Create IOPTEs with snoop control */
+   u8 set_pte_snp:1;
 
struct list_head devices;   /* all devices' list */
struct iova_domain iovad;   /* iova's that belong to this domain */
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index ab4408c824a5..583ea67fc783 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -123,4 +123,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
 bool fault_ignore);
 int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid);
 void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid);
+void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid);
 #endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d68f5bbf3e93..6ac49daa483b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2459,7 +2459,7 @@ static int domain_setup_first_level(struct intel_iommu 
*iommu,
if (level == 5)
flags |= PASID_FLAG_FL5LP;
 
-   if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+   if (domain->force_snooping)
flags |= PASID_FLAG_PAGE_SNOOP;
 
return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
@@ -4431,7 +4431,7 @@ static int intel_iommu_map(struct iommu_domain *domain,
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
-   if (dmar_domain->force_snooping)
+   if (dmar_domain->set_pte_snp)
prot |= DMA_PTE_SNP;
 
max_addr = iova + size;
@@ -4554,13 +4554,60 @@ static phys_addr_t intel_iommu_iova_to_phys(struct 
iommu_domain *domain,
return phys;
 }
 
+static bool domain_support_force_snooping(struct dmar_domain *domain)
+{
+   struct device_domain_info *info;
+   bool support = true;
+
+   assert_spin_locked(&device_domain_lock);
+   list_for_each_entry(info, &domain->devices, link) {
+   if (!ecap_sc_support(info->iommu->ecap)) {
+   support = false;
+   break;
+   }
+   }
+
+   return support;
+}
+
+static void domain_set_force_snooping(struct dmar_domain *domain)
+{
+   struct device_domain_info *info;
+
+   assert_spin_locked(&device_domain_lock);
+
+   /*
+* Second level page table supports per-PTE snoop control. The
+* iommu_map() interface will handle this by setting SNP bit.
+*/
+   if (!domain_use_first_level(domain)) {
+   domain->set_pte_snp = true;
+   return;
+   }
+
+   list_for_each_entry(info, &domain->devices, link)
+   intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
+PASID_RID2PASID);
+}
+
 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
 {
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+   unsigned long flags;
 
-   if (!domain_update_iommu_snooping(NULL))
+   if (dmar_domain->force_snooping)
+   return true;
+
+   spin_lock_irqsave(&device_domain_lock, flags);
+   if (!domain_support_force_snooping(dmar_domain)) {
+   spin_unlock_irqrestore(&device_domain_lock, flags);
return false;
+   }
+
+   domain_set_force_snooping(dmar_domain);
dmar_domain->force_snooping = true;
+   spin_unlock_irqrestore(&device_domain_lock, flags);
+
return true;
 }
 
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index f8d215d85695..d19dd66a670c 100644
--- a/drivers/iommu/intel/pasid.c

[PATCH 5/8] iommu/vt-d: Block force-snoop domain attaching if no SC support

2022-05-09 Thread Lu Baolu
In the attach_dev callback of the default domain ops, if the domain has
been set force_snooping, but the iommu hardware of the device does not
support SC(Snoop Control) capability, the callback should block it and
return a corresponding error code.

Signed-off-by: Lu Baolu 
Reviewed-by: Jason Gunthorpe 
Reviewed-by: Kevin Tian 
Link: 
https://lore.kernel.org/r/20220508123525.1973626-1-baolu...@linux.intel.com
---
 drivers/iommu/intel/iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index cf43e8f9091b..d68f5bbf3e93 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4354,6 +4354,9 @@ static int prepare_domain_attach_device(struct 
iommu_domain *domain,
if (!iommu)
return -ENODEV;
 
+   if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
+   return -EOPNOTSUPP;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/8] iommu/vt-d: Size Page Request Queue to avoid overflow condition

2022-05-09 Thread Lu Baolu
PRQ overflow may cause I/O throughput congestion, resulting in unnecessary
degradation of I/O performance. Appropriately increasing the length of PRQ
can greatly reduce the occurrence of PRQ overflow. The count of maximum
page requests that can be generated in parallel by a PCIe device is
statically defined in the Outstanding Page Request Capacity field of the
PCIe ATS configure space.

The new length of PRQ is calculated by summing up the value of Outstanding
Page Request Capacity register across all devices where Page Requests are
supported on the real PR-capable platform (Intel Sapphire Rapids). The
result is round to the nearest higher power of 2.

The PRQ length is also double sized as the VT-d IOMMU driver only updates
the Page Request Queue Head Register (PQH_REG) after processing the entire
queue.

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
Link: 
https://lore.kernel.org/r/20220421113558.3504874-1-baolu...@linux.intel.com
---
 include/linux/intel-svm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index b3b125b332aa..207ef06ba3e1 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -9,7 +9,7 @@
 #define __INTEL_SVM_H__
 
 /* Page Request Queue depth */
-#define PRQ_ORDER  2
+#define PRQ_ORDER  4
 #define PRQ_RING_MASK  ((0x1000 << PRQ_ORDER) - 0x20)
 #define PRQ_DEPTH  ((0x1000 << PRQ_ORDER) >> 5)
 
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/8] iommu/vt-d: Fold dmar_insert_one_dev_info() into its caller

2022-05-09 Thread Lu Baolu
Fold dmar_insert_one_dev_info() into domain_add_dev_info() which is its
only caller.

No intentional functional impact.

Suggested-by: Christoph Hellwig 
Signed-off-by: Lu Baolu 
Reviewed-by: Christoph Hellwig 
Link: https://lore.kernel.org/r/20220416120423.879552-1-baolu...@linux.intel.com
---
 drivers/iommu/intel/iommu.c | 110 +---
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a5ca2b536ea8..cf43e8f9091b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2473,64 +2473,6 @@ static bool dev_is_real_dma_subdevice(struct device *dev)
   pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
 }
 
-static int dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus,
-   int devfn, struct device *dev,
-   struct dmar_domain *domain)
-{
-   struct device_domain_info *info = dev_iommu_priv_get(dev);
-   unsigned long flags;
-   int ret;
-
-   spin_lock_irqsave(&device_domain_lock, flags);
-   info->domain = domain;
-   spin_lock(&iommu->lock);
-   ret = domain_attach_iommu(domain, iommu);
-   spin_unlock(&iommu->lock);
-   if (ret) {
-   spin_unlock_irqrestore(&device_domain_lock, flags);
-   return ret;
-   }
-   list_add(&info->link, &domain->devices);
-   spin_unlock_irqrestore(&device_domain_lock, flags);
-
-   /* PASID table is mandatory for a PCI device in scalable mode. */
-   if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
-   ret = intel_pasid_alloc_table(dev);
-   if (ret) {
-   dev_err(dev, "PASID table allocation failed\n");
-   dmar_remove_one_dev_info(dev);
-   return ret;
-   }
-
-   /* Setup the PASID entry for requests without PASID: */
-   spin_lock_irqsave(&iommu->lock, flags);
-   if (hw_pass_through && domain_type_is_si(domain))
-   ret = intel_pasid_setup_pass_through(iommu, domain,
-   dev, PASID_RID2PASID);
-   else if (domain_use_first_level(domain))
-   ret = domain_setup_first_level(iommu, domain, dev,
-   PASID_RID2PASID);
-   else
-   ret = intel_pasid_setup_second_level(iommu, domain,
-   dev, PASID_RID2PASID);
-   spin_unlock_irqrestore(&iommu->lock, flags);
-   if (ret) {
-   dev_err(dev, "Setup RID2PASID failed\n");
-   dmar_remove_one_dev_info(dev);
-   return ret;
-   }
-   }
-
-   ret = domain_context_mapping(domain, dev);
-   if (ret) {
-   dev_err(dev, "Domain context map failed\n");
-   dmar_remove_one_dev_info(dev);
-   return ret;
-   }
-
-   return 0;
-}
-
 static int iommu_domain_identity_map(struct dmar_domain *domain,
 unsigned long first_vpfn,
 unsigned long last_vpfn)
@@ -2606,14 +2548,64 @@ static int __init si_domain_init(int hw)
 
 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 {
+   struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu;
+   unsigned long flags;
u8 bus, devfn;
+   int ret;
 
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;
 
-   return dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
+   spin_lock_irqsave(&device_domain_lock, flags);
+   info->domain = domain;
+   spin_lock(&iommu->lock);
+   ret = domain_attach_iommu(domain, iommu);
+   spin_unlock(&iommu->lock);
+   if (ret) {
+   spin_unlock_irqrestore(&device_domain_lock, flags);
+   return ret;
+   }
+   list_add(&info->link, &domain->devices);
+   spin_unlock_irqrestore(&device_domain_lock, flags);
+
+   /* PASID table is mandatory for a PCI device in scalable mode. */
+   if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
+   ret = intel_pasid_alloc_table(dev);
+   if (ret) {
+   dev_err(dev, "PASID table allocation failed\n");
+   dmar_remove_one_dev_info(dev);
+   return ret;
+   }
+
+   /* Setup the PASID entry for requests without PASID: */
+   spin_lock_irqsave(&iommu->lock, flags);
+   if (hw_pass_through && domain_type_is_si(domain))
+   ret = intel_pasid_setup_pass_through(iommu, domain,
+   dev, PASID_RID2PASID);
+   

[PATCH 2/8] iommu/vt-d: Change return type of dmar_insert_one_dev_info()

2022-05-09 Thread Lu Baolu
The dmar_insert_one_dev_info() returns the pass-in domain on success and
NULL on failure. This doesn't make much sense. Change it to an integer.

Signed-off-by: Lu Baolu 
Reviewed-by: Christoph Hellwig 
Link: https://lore.kernel.org/r/20220416120423.879552-1-baolu...@linux.intel.com
---
 drivers/iommu/intel/iommu.c | 27 +++
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 626c2927344f..a5ca2b536ea8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2473,10 +2473,9 @@ static bool dev_is_real_dma_subdevice(struct device *dev)
   pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
 }
 
-static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
-   int bus, int devfn,
-   struct device *dev,
-   struct dmar_domain *domain)
+static int dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus,
+   int devfn, struct device *dev,
+   struct dmar_domain *domain)
 {
struct device_domain_info *info = dev_iommu_priv_get(dev);
unsigned long flags;
@@ -2489,7 +2488,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
spin_unlock(&iommu->lock);
if (ret) {
spin_unlock_irqrestore(&device_domain_lock, flags);
-   return NULL;
+   return ret;
}
list_add(&info->link, &domain->devices);
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -2500,7 +2499,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
dmar_remove_one_dev_info(dev);
-   return NULL;
+   return ret;
}
 
/* Setup the PASID entry for requests without PASID: */
@@ -2518,17 +2517,18 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
-   return NULL;
+   return ret;
}
}
 
-   if (domain_context_mapping(domain, dev)) {
+   ret = domain_context_mapping(domain, dev);
+   if (ret) {
dev_err(dev, "Domain context map failed\n");
dmar_remove_one_dev_info(dev);
-   return NULL;
+   return ret;
}
 
-   return domain;
+   return 0;
 }
 
 static int iommu_domain_identity_map(struct dmar_domain *domain,
@@ -2606,7 +2606,6 @@ static int __init si_domain_init(int hw)
 
 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 {
-   struct dmar_domain *ndomain;
struct intel_iommu *iommu;
u8 bus, devfn;
 
@@ -2614,11 +2613,7 @@ static int domain_add_dev_info(struct dmar_domain 
*domain, struct device *dev)
if (!iommu)
return -ENODEV;
 
-   ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
-   if (ndomain != domain)
-   return -EBUSY;
-
-   return 0;
+   return dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
 }
 
 static bool device_has_rmrr(struct device *dev)
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/8] [PULL REQUEST] Intel IOMMU updates for Linux v5.19

2022-05-09 Thread Lu Baolu
Hi Joerg,

This includes patches queued for v5.19. It includes:

 - Domain force snooping improvement.
 - Cleanups, no intentional functional changes.

Please consider them for v5.19.

[This series cannot be directly applied to vt-d branch. Some domain
 force snooping patches have been merged on the core branch. You may
 also need to add those patches to the vt-d branch, or just merge
 this series into the core branch.]

Best regards,
Baolu

Lu Baolu (7):
  iommu/vt-d: Change return type of dmar_insert_one_dev_info()
  iommu/vt-d: Fold dmar_insert_one_dev_info() into its caller
  iommu/vt-d: Size Page Request Queue to avoid overflow condition
  iommu/vt-d: Block force-snoop domain attaching if no SC support
  iommu/vt-d: Check domain force_snooping against attached devices
  iommu/vt-d: Remove domain_update_iommu_snooping()
  iommu/vt-d: Remove hard coding PGSNP bit in PASID entries

Muhammad Usama Anjum (1):
  iommu/vt-d: Remove unneeded validity check on dev

 include/linux/intel-iommu.h |   1 +
 include/linux/intel-svm.h   |   2 +-
 drivers/iommu/intel/pasid.h |   2 +
 drivers/iommu/intel/iommu.c | 201 ++--
 drivers/iommu/intel/pasid.c |  45 +++-
 5 files changed, 149 insertions(+), 102 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/8] iommu/vt-d: Remove unneeded validity check on dev

2022-05-09 Thread Lu Baolu
From: Muhammad Usama Anjum 

dev_iommu_priv_get() is being used at the top of this function which
dereferences dev. Dev cannot be NULL after this. Remove the validity
check on dev and simplify the code.

Signed-off-by: Muhammad Usama Anjum 
Link: 
https://lore.kernel.org/r/20220313150337.593650-1-usama.an...@collabora.com
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2990f80c5e08..626c2927344f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2522,7 +2522,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
}
 
-   if (dev && domain_context_mapping(domain, dev)) {
+   if (domain_context_mapping(domain, dev)) {
dev_err(dev, "Domain context map failed\n");
dmar_remove_one_dev_info(dev);
return NULL;
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH RFC 00/19] IOMMUFD Dirty Tracking

2022-05-09 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Friday, May 6, 2022 7:46 PM
> 
> On Fri, May 06, 2022 at 03:51:40AM +, Tian, Kevin wrote:
> > > From: Jason Gunthorpe 
> > > Sent: Thursday, May 5, 2022 10:08 PM
> > >
> > > On Thu, May 05, 2022 at 07:40:37AM +, Tian, Kevin wrote:
> > >
> > > > In concept this is an iommu property instead of a domain property.
> > >
> > > Not really, domains shouldn't be changing behaviors once they are
> > > created. If a domain supports dirty tracking and I attach a new device
> > > then it still must support dirty tracking.
> >
> > That sort of suggests that userspace should specify whether a domain
> > supports dirty tracking when it's created. But how does userspace
> > know that it should create the domain in this way in the first place?
> > live migration is triggered on demand and it may not happen in the
> > lifetime of a VM.
> 
> The best you could do is to look at the devices being plugged in at VM
> startup, and if they all support live migration then request dirty
> tracking, otherwise don't.

Yes, this is how a device capability can help.

> 
> However, tt costs nothing to have dirty tracking as long as all iommus
> support it in the system - which seems to be the normal case today.
> 
> We should just always turn it on at this point.

Then still need a way to report " all iommus support it in the system"
to userspace since many old systems don't support it at all. If we all
agree that a device capability flag would be helpful on this front (like
you also said below), probably can start building the initial skeleton
with that in mind?

> 
> > and if the user always creates domain to allow dirty tracking by default,
> > how does it know a failed attach is due to missing dirty tracking support
> > by the IOMMU and then creates another domain which disables dirty
> > tracking and retry-attach again?
> 
> The automatic logic is complicated for sure, if you had a device flag
> it would have to figure it out that way
> 

Yes. That is the model in my mind.

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH] iommu/vt-d: Increase DMAR_UNITS_SUPPORTED

2022-05-09 Thread Tian, Kevin
> From: Steve Wahl 
> Sent: Friday, May 6, 2022 11:26 PM
> 
> On Fri, May 06, 2022 at 08:12:11AM +, Tian, Kevin wrote:
> > > From: David Woodhouse 
> > > Sent: Friday, May 6, 2022 3:17 PM
> > >
> > > On Fri, 2022-05-06 at 06:49 +, Tian, Kevin wrote:
> > > > > From: Baolu Lu 
> > > > >
> > > > > > --- a/include/linux/dmar.h
> > > > > > +++ b/include/linux/dmar.h
> > > > > > @@ -19,7 +19,7 @@
> > > > > >   struct acpi_dmar_header;
> > > > > >
> > > > > >   #ifdefCONFIG_X86
> > > > > > -# define   DMAR_UNITS_SUPPORTEDMAX_IO_APICS
> > > > > > +# define   DMAR_UNITS_SUPPORTED640
> > > > > >   #else
> > > > > >   # define  DMAR_UNITS_SUPPORTED64
> > > > > >   #endif
> > > >
> > > > ... is it necessary to permanently do 10x increase which wastes memory
> > > > on most platforms which won't have such need.
> > >
> > > I was just looking at that. It mostly adds about 3½ KiB to each struct
> > > dmar_domain.
> > >
> > > I think the only actual static array is the dmar_seq_ids bitmap which
> > > grows to 640 *bits* which is fairly negligible, and the main growth is
> > > that it adds about 3½ KiB to each struct dmar_domain for the
> > > iommu_refcnt[] and iommu_did[] arrays.
> >
> > Thanks for the quick experiment! though the added material is
> > negligible it's cleaner to me if having a way to configure it as
> > discussed below.
> >
> > >
> > > > Does it make more sense to have a configurable approach similar to
> > > > CONFIG_NR_CPUS? or even better can we just replace those static
> > > > arrays with dynamic allocation so removing this restriction
> > > > completely?
> > >
> > > Hotplug makes that fun, but I suppose you only need to grow the array
> > > in a given struct dmar_domain if you actually add a device to it that's
> > > behind a newly added IOMMU. I don't know if the complexity of making it
> > > fully dynamic is worth it though. We could make it a config option,
> > > and/or a command line option (perhaps automatically derived from
> > > CONFIG_NR_CPUS).
> >
> > either config option or command line option is OK to me. Probably
> > the former is simpler given no need to dynamically expand the
> > static array. btw though deriving from CONFIG_NR_CPUS could work
> > in this case it is unclear why tying the two together is necessary in
> > concept, e.g. is there guarantee that the number of IOMMUs must
> > be smaller than the number of CPUs in a platform?
> >
> > >
> > > If it wasn't for hotplug, I think we'd know the right number by the
> > > time we actually need it anyway, wouldn't we? Can we have a heuristic
> > > for how many DMAR units are likely to be hotplugged? Is it as simple as
> > > the ratio of present to not-yet-present CPUs in MADT?
> >
> > Probably. But I don't have enough knowledge on DMAR hotplug to
> > judge (e.g. whether it's strictly tied to CPU hotplug and if yes whether
> > there could be multiple IOMMUs hotplugged together with a CPU
> > socket)...
> >
> > Thanks
> > Kevin
> 
> Would anyone be more comfortable if we only increase the limit where
> MAXSMP is set?
> 
> i.e.
> 
> #if defined(CONFIG_X86) && defined(CONFIG_MAXSMP)
> # define  DMAR_UNITS_SUPPORTED640
> #elif defined(CONFIG_X86)
> # define  DMAR_UNITS_SUPPORTEDMAX_IO_APICS
> #else
> # define  DMAR_UNITS_SUPPORTED64
> #endif
> 
> Thank you all for your time looking at this.
> 

This works for your own configuration but it's unclear whether other
MAXSMP platforms have the exact same requirements (different
number of sockets, different ratio of #iommus/#sockets, etc.). In any
case since we are at it having a generic way to extend it makes more
sense to me.

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 2/4] iommu/vt-d: Check domain force_snooping against attached devices

2022-05-09 Thread Baolu Lu

On 2022/5/10 08:51, Tian, Kevin wrote:

From: Lu Baolu 
Sent: Sunday, May 8, 2022 8:35 PM

As domain->force_snooping only impacts the devices attached with the
domain, there's no need to check against all IOMMU units. On the other
hand, force_snooping could be set on a domain no matter whether it has
been attached or not, and once set it is an immutable flag. If no
device attached, the operation always succeeds. Then this empty domain
can be only attached to a device of which the IOMMU supports snoop
control.

Signed-off-by: Lu Baolu 


Reviewed-by: Kevin Tian 


Thank you, Kevin. I will queue this series for v5.19.

Best regards,
baolu




---
  include/linux/intel-iommu.h |  1 +
  drivers/iommu/intel/pasid.h |  2 ++
  drivers/iommu/intel/iommu.c | 53
++---
  drivers/iommu/intel/pasid.c | 42 +
  4 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 72e5d7900e71..4f29139bbfc3 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -540,6 +540,7 @@ struct dmar_domain {
u8 has_iotlb_device: 1;
u8 iommu_coherency: 1;  /* indicate coherency of
iommu access */
u8 force_snooping : 1;  /* Create IOPTEs with snoop control
*/
+   u8 set_pte_snp:1;

struct list_head devices;   /* all devices' list */
struct iova_domain iovad;   /* iova's that belong to this domain
*/
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index ab4408c824a5..583ea67fc783 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -123,4 +123,6 @@ void intel_pasid_tear_down_entry(struct
intel_iommu *iommu,
 bool fault_ignore);
  int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid);
  void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid);
+void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid);
  #endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b4802f4055a0..048ebfbd5fcb 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2459,7 +2459,7 @@ static int domain_setup_first_level(struct
intel_iommu *iommu,
if (level == 5)
flags |= PASID_FLAG_FL5LP;

-   if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+   if (domain->force_snooping)
flags |= PASID_FLAG_PAGE_SNOOP;

return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
@@ -,7 +,7 @@ static int intel_iommu_map(struct iommu_domain
*domain,
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
-   if (dmar_domain->force_snooping)
+   if (dmar_domain->set_pte_snp)
prot |= DMA_PTE_SNP;

max_addr = iova + size;
@@ -4567,13 +4567,60 @@ static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain *domain,
return phys;
  }

+static bool domain_support_force_snooping(struct dmar_domain *domain)
+{
+   struct device_domain_info *info;
+   bool support = true;
+
+   assert_spin_locked(&device_domain_lock);
+   list_for_each_entry(info, &domain->devices, link) {
+   if (!ecap_sc_support(info->iommu->ecap)) {
+   support = false;
+   break;
+   }
+   }
+
+   return support;
+}
+
+static void domain_set_force_snooping(struct dmar_domain *domain)
+{
+   struct device_domain_info *info;
+
+   assert_spin_locked(&device_domain_lock);
+
+   /*
+* Second level page table supports per-PTE snoop control. The
+* iommu_map() interface will handle this by setting SNP bit.
+*/
+   if (!domain_use_first_level(domain)) {
+   domain->set_pte_snp = true;
+   return;
+   }
+
+   list_for_each_entry(info, &domain->devices, link)
+   intel_pasid_setup_page_snoop_control(info->iommu, info-

dev,

+PASID_RID2PASID);
+}
+
  static bool intel_iommu_enforce_cache_coherency(struct iommu_domain
*domain)
  {
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+   unsigned long flags;

-   if (!domain_update_iommu_snooping(NULL))
+   if (dmar_domain->force_snooping)
+   return true;
+
+   spin_lock_irqsave(&device_domain_lock, flags);
+   if (!domain_support_force_snooping(dmar_domain)) {
+   spin_unlock_irqrestore(&device_domain_lock, flags);
return false;
+   }
+
+   domain_set_force_snooping(dmar_domain);
dmar_domain->force_snooping = true;
+   spin_unlock_irqrestore(&device_domain_lock, flags);
+
return true;
  }

diff --git a/drivers/iommu/intel/pasid.c b/drive

RE: [PATCH v3] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Tuesday, May 10, 2022 12:19 AM
> 
> Once the group enters 'owned' mode it can never be assigned back to the
> default_domain or to a NULL domain. It must always be actively assigned to
> a current domain. If the caller hasn't provided a domain then the core
> must provide an explicit DMA blocking domain that has no DMA map.
> 
> Lazily create a group-global blocking DMA domain when
> iommu_group_claim_dma_owner is first called and immediately assign the
> group to it. This ensures that DMA is immediately fully isolated on all
> IOMMU drivers.
> 
> If the user attaches/detaches while owned then detach will set the group
> back to the blocking domain.
> 
> Slightly reorganize the call chains so that
> __iommu_group_set_core_domain() is the function that removes any caller
> configured domain and sets the domains back a core owned domain with an
> appropriate lifetime.
> 
> __iommu_group_set_domain() is the worker function that can change the
> domain assigned to a group to any target domain, including NULL.
> 
> Add comments clarifying how the NULL vs detach_dev vs default_domain
> works
> based on Robin's remarks.
> 
> This fixes an oops with VFIO and SMMUv3 because VFIO will call
> iommu_detach_group() and then immediately iommu_domain_free(), but
> SMMUv3 has no way to know that the domain it is holding a pointer to
> has been freed. Now the iommu_detach_group() will assign the blocking
> domain and SMMUv3 will no longer hold a stale domain reference.
> 
> Fixes: 1ea2a07a532b ("iommu: Add DMA ownership management
> interfaces")
> Reported-by: Qian Cai 
> Tested-by: Baolu Lu 
> Tested-by: Nicolin Chen 
> Co-developed-by: Robin Murphy 
> Signed-off-by: Robin Murphy 
> Signed-off-by: Jason Gunthorpe 

Reviewed-by: Kevin Tian 

> --
> 
> Just minor polishing as discussed
> 
> v3:
>  - Change names to __iommu_group_set_domain() /
>__iommu_group_set_core_domain()
>  - Clarify comments
>  - Call __iommu_group_set_domain() directly in
>iommu_group_release_dma_owner() since we know it is always selecting
>the default_domain
> v2: https://lore.kernel.org/r/0-v2-f62259511ac0+6-
> iommu_dma_block_...@nvidia.com
>  - Remove redundant detach_dev ops check in __iommu_detach_device and
>make the added WARN_ON fail instead
>  - Check for blocking_domain in __iommu_attach_group() so VFIO can
>actually attach a new group
>  - Update comments and spelling
>  - Fix missed change to new_domain in iommu_group_do_detach_device()
> v1: https://lore.kernel.org/r/0-v1-6e9d2d0a759d+11b-
> iommu_dma_block_...@nvidia.com
> 
> ---
>  drivers/iommu/iommu.c | 127 ++
>  1 file changed, 91 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 0c42ece2585406..0b22e51e90f416 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -44,6 +44,7 @@ struct iommu_group {
>   char *name;
>   int id;
>   struct iommu_domain *default_domain;
> + struct iommu_domain *blocking_domain;
>   struct iommu_domain *domain;
>   struct list_head entry;
>   unsigned int owner_cnt;
> @@ -82,8 +83,8 @@ static int __iommu_attach_device(struct
> iommu_domain *domain,
>struct device *dev);
>  static int __iommu_attach_group(struct iommu_domain *domain,
>   struct iommu_group *group);
> -static void __iommu_detach_group(struct iommu_domain *domain,
> -  struct iommu_group *group);
> +static int __iommu_group_set_domain(struct iommu_group *group,
> + struct iommu_domain *new_domain);
>  static int iommu_create_device_direct_mappings(struct iommu_group
> *group,
>  struct device *dev);
>  static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
> @@ -596,6 +597,8 @@ static void iommu_group_release(struct kobject
> *kobj)
> 
>   if (group->default_domain)
>   iommu_domain_free(group->default_domain);
> + if (group->blocking_domain)
> + iommu_domain_free(group->blocking_domain);
> 
>   kfree(group->name);
>   kfree(group);
> @@ -1907,6 +1910,24 @@ void iommu_domain_free(struct iommu_domain
> *domain)
>  }
>  EXPORT_SYMBOL_GPL(iommu_domain_free);
> 
> +/*
> + * Put the group's domain back to the appropriate core-owned domain -
> either the
> + * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
> + */
> +static void __iommu_group_set_core_domain(struct iommu_group *group)
> +{
> + struct iommu_domain *new_domain;
> + int ret;
> +
> + if (group->owner)
> + new_domain = group->blocking_domain;
> + else
> + new_domain = group->default_domain;
> +
> + ret = __iommu_group_set_domain(group, new_domain);
> + WARN(ret, "iommu driver failed to attach the default/blocking
> domain");
> +}
> +
>  static int __iommu_atta

RE: [PATCH v4 2/4] iommu/vt-d: Check domain force_snooping against attached devices

2022-05-09 Thread Tian, Kevin
> From: Lu Baolu 
> Sent: Sunday, May 8, 2022 8:35 PM
> 
> As domain->force_snooping only impacts the devices attached with the
> domain, there's no need to check against all IOMMU units. On the other
> hand, force_snooping could be set on a domain no matter whether it has
> been attached or not, and once set it is an immutable flag. If no
> device attached, the operation always succeeds. Then this empty domain
> can be only attached to a device of which the IOMMU supports snoop
> control.
> 
> Signed-off-by: Lu Baolu 

Reviewed-by: Kevin Tian 

> ---
>  include/linux/intel-iommu.h |  1 +
>  drivers/iommu/intel/pasid.h |  2 ++
>  drivers/iommu/intel/iommu.c | 53
> ++---
>  drivers/iommu/intel/pasid.c | 42 +
>  4 files changed, 95 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 72e5d7900e71..4f29139bbfc3 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -540,6 +540,7 @@ struct dmar_domain {
>   u8 has_iotlb_device: 1;
>   u8 iommu_coherency: 1;  /* indicate coherency of
> iommu access */
>   u8 force_snooping : 1;  /* Create IOPTEs with snoop control
> */
> + u8 set_pte_snp:1;
> 
>   struct list_head devices;   /* all devices' list */
>   struct iova_domain iovad;   /* iova's that belong to this domain
> */
> diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
> index ab4408c824a5..583ea67fc783 100644
> --- a/drivers/iommu/intel/pasid.h
> +++ b/drivers/iommu/intel/pasid.h
> @@ -123,4 +123,6 @@ void intel_pasid_tear_down_entry(struct
> intel_iommu *iommu,
>bool fault_ignore);
>  int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid);
>  void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid);
> +void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
> +   struct device *dev, u32 pasid);
>  #endif /* __INTEL_PASID_H */
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index b4802f4055a0..048ebfbd5fcb 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -2459,7 +2459,7 @@ static int domain_setup_first_level(struct
> intel_iommu *iommu,
>   if (level == 5)
>   flags |= PASID_FLAG_FL5LP;
> 
> - if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
> + if (domain->force_snooping)
>   flags |= PASID_FLAG_PAGE_SNOOP;
> 
>   return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
> @@ -,7 +,7 @@ static int intel_iommu_map(struct iommu_domain
> *domain,
>   prot |= DMA_PTE_READ;
>   if (iommu_prot & IOMMU_WRITE)
>   prot |= DMA_PTE_WRITE;
> - if (dmar_domain->force_snooping)
> + if (dmar_domain->set_pte_snp)
>   prot |= DMA_PTE_SNP;
> 
>   max_addr = iova + size;
> @@ -4567,13 +4567,60 @@ static phys_addr_t
> intel_iommu_iova_to_phys(struct iommu_domain *domain,
>   return phys;
>  }
> 
> +static bool domain_support_force_snooping(struct dmar_domain *domain)
> +{
> + struct device_domain_info *info;
> + bool support = true;
> +
> + assert_spin_locked(&device_domain_lock);
> + list_for_each_entry(info, &domain->devices, link) {
> + if (!ecap_sc_support(info->iommu->ecap)) {
> + support = false;
> + break;
> + }
> + }
> +
> + return support;
> +}
> +
> +static void domain_set_force_snooping(struct dmar_domain *domain)
> +{
> + struct device_domain_info *info;
> +
> + assert_spin_locked(&device_domain_lock);
> +
> + /*
> +  * Second level page table supports per-PTE snoop control. The
> +  * iommu_map() interface will handle this by setting SNP bit.
> +  */
> + if (!domain_use_first_level(domain)) {
> + domain->set_pte_snp = true;
> + return;
> + }
> +
> + list_for_each_entry(info, &domain->devices, link)
> + intel_pasid_setup_page_snoop_control(info->iommu, info-
> >dev,
> +  PASID_RID2PASID);
> +}
> +
>  static bool intel_iommu_enforce_cache_coherency(struct iommu_domain
> *domain)
>  {
>   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> + unsigned long flags;
> 
> - if (!domain_update_iommu_snooping(NULL))
> + if (dmar_domain->force_snooping)
> + return true;
> +
> + spin_lock_irqsave(&device_domain_lock, flags);
> + if (!domain_support_force_snooping(dmar_domain)) {
> + spin_unlock_irqrestore(&device_domain_lock, flags);
>   return false;
> + }
> +
> + domain_set_force_snooping(dmar_domain);
>   dmar_domain->force_snooping = true;
> + spin_unlock_irqrestore(&device_domain_lock, flags);
> +
>   return true;
>  }
> 
> diff --git a/drivers/iommu/

Re: [PATCH v2] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Jason Gunthorpe via iommu
On Mon, May 09, 2022 at 11:06:35PM +0100, Robin Murphy wrote:
> The main thing drivers need to do for flush queue support is to actually
> implement the optimisations which make it worthwhile. It's up to individual
> drivers how much use they want to make of the iommu_iotlb_gather mechanism,
> and they're free to issue invalidations or even enforce their completion
> directly within their unmap callback if they so wish. In such cases,
> enabling a flush queue would do nothing but hurt performance and waste
> memory.

It makes sense, but I think at this point it would be clearer to have
a domain->ops flag saying if the domain benefits from deferred flush or
not rather than overloading the type

> > But then what happens? This driver has no detach_dev so after, say
> > VFIO does stuff, how do we get back into whatever boot-time state NULL
> > represents?
> 
> Shhh... the main point of the arm-smmu legacy binding support is to do
> whatever the handful of people still using ancient AMD Seattle boards with
> original firmware need to do. Clearly they haven't been reassigning devices
> straight back from VFIO to a kernel driver for the last 6-and-a-bit years
> since that's been broken, so I'm now discounting it as a supported
> legacy-binding-use-case. Don't give them ideas! ;)

If everything is converted to use default domain what will this
return? NULL can't be a valid default domain..
 
> > It feels like this really wants a 'IOMMU_DOMAIN_PLATFORM_DMA_OPS' set
> > as the default_domain meaning that when that domain is assigned, the
> > platform's DMA ops are handling the iommu? If I get it right..
> 
> Nah, we just need to actually finish introducing default domains. I'm OK to
> tackle the remaining SoC IOMMU drivers once my bus ops work meanders into
> the arch/arm iommu-dma conversion revival; it just needs people who
> understand s390 and fsl-pamu well enough to at least (presumably) bodge up
> enough IOMMU_DOMAIN_IDENTITY support to replicate their current "detached"

Hum. Looking at s390 it is similar I think to smmu - when NULL they
expect their platform dma_ops to work in arch/s390/pci/pci_dma.c which
looks to me like a version of the normal iommu support through
IOMMU_DOMAIN_DMA. The good conversion is probably to move s390 to use
the normal dma API, the lame one is to use a
'IOMMU_DOMAIN_PLATFORM_DMA_OPS' and save it for another day.

fsl took some looking at, but it looks very much like IDENTITY. The
defconfig does not set CONFIG_DMA_OPS and there is no dma_ops code
aroudn it's arch support - so it follow the direct path. It seems like
there might be an offset though.

The two tegras look OK, they both look like blocking domains. Theirry
has been helpful at tegra stuff in pthe past.

Someone who cared about fsl tested a VFIO patch of mine last year, and
the s390 team is actively looking at nested domains right now.

> behaviours and force CONFIG_IOMMU_DEFAULT_PASSTHROUGH on those
> architectures.

Or should the iommu driver return IDENTITY from its ops
def_domain_type() ?

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Robin Murphy

On 2022-05-09 17:19, Jason Gunthorpe wrote:

Once the group enters 'owned' mode it can never be assigned back to the
default_domain or to a NULL domain. It must always be actively assigned to
a current domain. If the caller hasn't provided a domain then the core
must provide an explicit DMA blocking domain that has no DMA map.

Lazily create a group-global blocking DMA domain when
iommu_group_claim_dma_owner is first called and immediately assign the
group to it. This ensures that DMA is immediately fully isolated on all
IOMMU drivers.

If the user attaches/detaches while owned then detach will set the group
back to the blocking domain.

Slightly reorganize the call chains so that
__iommu_group_set_core_domain() is the function that removes any caller
configured domain and sets the domains back a core owned domain with an
appropriate lifetime.

__iommu_group_set_domain() is the worker function that can change the
domain assigned to a group to any target domain, including NULL.

Add comments clarifying how the NULL vs detach_dev vs default_domain works
based on Robin's remarks.

This fixes an oops with VFIO and SMMUv3 because VFIO will call
iommu_detach_group() and then immediately iommu_domain_free(), but
SMMUv3 has no way to know that the domain it is holding a pointer to
has been freed. Now the iommu_detach_group() will assign the blocking
domain and SMMUv3 will no longer hold a stale domain reference.


Thanks Jason, from my PoV this looks great now - I still think it feels 
too silly to give a formal review tag for a patch with my own sign-off, 
so this is just my ephemeral "let's get this branch back in -next ASAP 
and hope nothing else shakes loose" :)


Cheers,
Robin.


Fixes: 1ea2a07a532b ("iommu: Add DMA ownership management interfaces")
Reported-by: Qian Cai 
Tested-by: Baolu Lu 
Tested-by: Nicolin Chen 
Co-developed-by: Robin Murphy 
Signed-off-by: Robin Murphy 
Signed-off-by: Jason Gunthorpe 
--

Just minor polishing as discussed

v3:
  - Change names to __iommu_group_set_domain() /
__iommu_group_set_core_domain()
  - Clarify comments
  - Call __iommu_group_set_domain() directly in
iommu_group_release_dma_owner() since we know it is always selecting
the default_domain
v2: https://lore.kernel.org/r/0-v2-f62259511ac0+6-iommu_dma_block_...@nvidia.com
  - Remove redundant detach_dev ops check in __iommu_detach_device and
make the added WARN_ON fail instead
  - Check for blocking_domain in __iommu_attach_group() so VFIO can
actually attach a new group
  - Update comments and spelling
  - Fix missed change to new_domain in iommu_group_do_detach_device()
v1: 
https://lore.kernel.org/r/0-v1-6e9d2d0a759d+11b-iommu_dma_block_...@nvidia.com

---
  drivers/iommu/iommu.c | 127 ++
  1 file changed, 91 insertions(+), 36 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0c42ece2585406..0b22e51e90f416 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -44,6 +44,7 @@ struct iommu_group {
char *name;
int id;
struct iommu_domain *default_domain;
+   struct iommu_domain *blocking_domain;
struct iommu_domain *domain;
struct list_head entry;
unsigned int owner_cnt;
@@ -82,8 +83,8 @@ static int __iommu_attach_device(struct iommu_domain *domain,
 struct device *dev);
  static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group);
-static void __iommu_detach_group(struct iommu_domain *domain,
-struct iommu_group *group);
+static int __iommu_group_set_domain(struct iommu_group *group,
+   struct iommu_domain *new_domain);
  static int iommu_create_device_direct_mappings(struct iommu_group *group,
   struct device *dev);
  static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
@@ -596,6 +597,8 @@ static void iommu_group_release(struct kobject *kobj)
  
  	if (group->default_domain)

iommu_domain_free(group->default_domain);
+   if (group->blocking_domain)
+   iommu_domain_free(group->blocking_domain);
  
  	kfree(group->name);

kfree(group);
@@ -1907,6 +1910,24 @@ void iommu_domain_free(struct iommu_domain *domain)
  }
  EXPORT_SYMBOL_GPL(iommu_domain_free);
  
+/*

+ * Put the group's domain back to the appropriate core-owned domain - either 
the
+ * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
+ */
+static void __iommu_group_set_core_domain(struct iommu_group *group)
+{
+   struct iommu_domain *new_domain;
+   int ret;
+
+   if (group->owner)
+   new_domain = group->blocking_domain;
+   else
+   new_domain = group->default_domain;
+
+   ret = __iommu_group_set_domain(group, new_domain);
+   WARN(ret, "iommu driver failed to attach the default/b

Re: [PATCH v2] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Robin Murphy

On 2022-05-09 18:26, Jason Gunthorpe wrote:

On Mon, May 09, 2022 at 10:59:11AM +0100, Robin Murphy wrote:


IOMMU_DOMAIN_DMA_FQ now effectively takes over the original
semantics of IOMMU_DOMAIN_DMA as the one that depends on
driver-specific functionality.


If I grasp the FQ stuff right, it seems that this only requires the
driver to implement ops->flush_iotlb_all()? I don't see anything
obvious in any driver that is specifically _FQ related?

If yes, it makes me wonder why I see drivers implementing
ops->flush_iotlb_all() but not supporting the _FQ domain during alloc?

Further, if yes, wouldn't it make sense to just trigger FQ based on
domain->op->flush_iotlb_all being set?


The main thing drivers need to do for flush queue support is to actually 
implement the optimisations which make it worthwhile. It's up to 
individual drivers how much use they want to make of the 
iommu_iotlb_gather mechanism, and they're free to issue invalidations or 
even enforce their completion directly within their unmap callback if 
they so wish. In such cases, enabling a flush queue would do nothing but 
hurt performance and waste memory.



It seems like there is some confusion here, because I see the sysfs
default domain store path just does this:

/* We can bring up a flush queue without tearing down the domain */
if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
ret = iommu_dma_init_fq(prev_dom);
if (!ret)
prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
goto out;
}

Which will allow a driver that rejected creating DMA_FQ during alloc
to end up with DMA_FQ anyhow???


Yes, I can't remember if I ever mentioned it anywhere, but that is not 
an oversight. The sysfs interface is a bit of a specialist sport, and if 
a user really wants to go out of their way to bring up a flush queue 
which won't help performance, they can, and they can observe the 
zero-to-negative performance gain, and maybe learn not to bother again 
on that system. It's just not worth the additional complexity to try to 
save users from themselves.



FWIW, mtk-iommu doesn't really have any need to reject
IOMMU_DOMAIN_UNMANAGED, they just don't have any relevant client drivers
that want it;


Ok..


however arm-smmu definitely does need to continue rejecting
IOMMU_DOMAIN_DMA when it can't rely on the DT code ensuring the
correct probe ordering with the old DT binding (otherwise client
drivers are liable to get broken DMA ops).


I saw this code and wondered what it does?

smmu alloc_domain returns NULL, which if I read everything right
causes NULL to become the default_domain.

But then what happens? This driver has no detach_dev so after, say
VFIO does stuff, how do we get back into whatever boot-time state NULL
represents?


Shhh... the main point of the arm-smmu legacy binding support is to do 
whatever the handful of people still using ancient AMD Seattle boards 
with original firmware need to do. Clearly they haven't been reassigning 
devices straight back from VFIO to a kernel driver for the last 
6-and-a-bit years since that's been broken, so I'm now discounting it as 
a supported legacy-binding-use-case. Don't give them ideas! ;)



Is it OK because dev_iommu_priv_get() in arm_smmu_attach_dev() will
always fail if legacy? If that is the case then why allow allocating
any domain at all?

It feels like this really wants a 'IOMMU_DOMAIN_PLATFORM_DMA_OPS' set
as the default_domain meaning that when that domain is assigned, the
platform's DMA ops are handling the iommu? If I get it right..


Nah, we just need to actually finish introducing default domains. I'm OK 
to tackle the remaining SoC IOMMU drivers once my bus ops work meanders 
into the arch/arm iommu-dma conversion revival; it just needs people who 
understand s390 and fsl-pamu well enough to at least (presumably) bodge 
up enough IOMMU_DOMAIN_IDENTITY support to replicate their current 
"detached" behaviours and force CONFIG_IOMMU_DEFAULT_PASSTHROUGH on 
those architectures.


Cheers,
Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: fully convert arm to use dma-direct

2022-05-09 Thread Linus Walleij
On Mon, May 9, 2022 at 5:44 PM Russell King (Oracle)
 wrote:

> Assabet is what needs testing for that, or one of the SA1110 machines.
> I'm away from home on my boat (and have been for the last two and a bit
> weeks) so can't test. Sorry.

Hm I actually have an Assabet, but I never even powered it up. I'm
on parental leave for another week but after that I could actually
try to get that machine up, but it'd be a bit late for this merge window
indeed.

BR
Linus Walleij
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/dma: Explicitly sort PCI DMA windows

2022-05-09 Thread Rob Herring
On Mon, May 9, 2022 at 5:16 AM Robin Murphy  wrote:
>
> Originally, creating the dma_ranges resource list in pre-sorted fashion
> was the simplest and most efficient way to enforce the order required by
> iova_reserve_pci_windows(). However since then at least one PCI host
> driver is now re-sorting the list for its own probe-time processing,
> which doesn't seem entirely unreasonable, so that basic assumption no
> longer holds. Make iommu-dma robust and get the sort order it needs by
> explicitly sorting, which means we can also save the effort at creation
> time and just build the list in whatever natural order the DT had.
>
> Signed-off-by: Robin Murphy 
> ---
>
> v2: Clean up now-unused local variable
>
>  drivers/iommu/dma-iommu.c | 13 -
>  drivers/pci/of.c  |  8 +---
>  2 files changed, 13 insertions(+), 8 deletions(-)

Reviewed-by: Rob Herring 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RESEND PATCH v8 00/11] Fix BUG_ON in vfio_iommu_group_notifier()

2022-05-09 Thread Jason Gunthorpe via iommu
On Wed, May 04, 2022 at 01:57:05PM +0200, Joerg Roedel wrote:
> On Wed, May 04, 2022 at 08:51:35AM -0300, Jason Gunthorpe wrote:
> > Nicolin and Eric have been testing with this series on ARM for a long
> > time now, it is not like it is completely broken.
> 
> Yeah, I am also optimistic this can be fixed soon. But the rule is that
> the next branch should only contain patches which I would send to Linus.
> And with a known issue in it I wouldn't, so it is excluded at least from
> my next branch for now. The topic branch is still alive and I will merge
> it again when the fix is in.

The fix is out, lets merge it back in so we can have some more time to
discover any additional issues. People seem to test when it is in your
branch.

Thanks,
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Jason Gunthorpe via iommu
On Mon, May 09, 2022 at 10:59:11AM +0100, Robin Murphy wrote:

> IOMMU_DOMAIN_DMA_FQ now effectively takes over the original
> semantics of IOMMU_DOMAIN_DMA as the one that depends on
> driver-specific functionality.

If I grasp the FQ stuff right, it seems that this only requires the
driver to implement ops->flush_iotlb_all()? I don't see anything
obvious in any driver that is specifically _FQ related?

If yes, it makes me wonder why I see drivers implementing
ops->flush_iotlb_all() but not supporting the _FQ domain during alloc?

Further, if yes, wouldn't it make sense to just trigger FQ based on
domain->op->flush_iotlb_all being set?

It seems like there is some confusion here, because I see the sysfs
default domain store path just does this:

/* We can bring up a flush queue without tearing down the domain */
if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
ret = iommu_dma_init_fq(prev_dom);
if (!ret)
prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
goto out;
}

Which will allow a driver that rejected creating DMA_FQ during alloc
to end up with DMA_FQ anyhow???

> FWIW, mtk-iommu doesn't really have any need to reject
> IOMMU_DOMAIN_UNMANAGED, they just don't have any relevant client drivers
> that want it;

Ok..

> however arm-smmu definitely does need to continue rejecting
> IOMMU_DOMAIN_DMA when it can't rely on the DT code ensuring the
> correct probe ordering with the old DT binding (otherwise client
> drivers are liable to get broken DMA ops).

I saw this code and wondered what it does?

smmu alloc_domain returns NULL, which if I read everything right
causes NULL to become the default_domain.

But then what happens? This driver has no detach_dev so after, say
VFIO does stuff, how do we get back into whatever boot-time state NULL
represents?

Is it OK because dev_iommu_priv_get() in arm_smmu_attach_dev() will
always fail if legacy? If that is the case then why allow allocating
any domain at all?

It feels like this really wants a 'IOMMU_DOMAIN_PLATFORM_DMA_OPS' set
as the default_domain meaning that when that domain is assigned, the
platform's DMA ops are handling the iommu? If I get it right..

Anyhow, thanks, this sort of helps confirm my feeling that the domain
types are a little crufty..

Thanks,
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Jason Gunthorpe via iommu
Once the group enters 'owned' mode it can never be assigned back to the
default_domain or to a NULL domain. It must always be actively assigned to
a current domain. If the caller hasn't provided a domain then the core
must provide an explicit DMA blocking domain that has no DMA map.

Lazily create a group-global blocking DMA domain when
iommu_group_claim_dma_owner is first called and immediately assign the
group to it. This ensures that DMA is immediately fully isolated on all
IOMMU drivers.

If the user attaches/detaches while owned then detach will set the group
back to the blocking domain.

Slightly reorganize the call chains so that
__iommu_group_set_core_domain() is the function that removes any caller
configured domain and sets the domains back a core owned domain with an
appropriate lifetime.

__iommu_group_set_domain() is the worker function that can change the
domain assigned to a group to any target domain, including NULL.

Add comments clarifying how the NULL vs detach_dev vs default_domain works
based on Robin's remarks.

This fixes an oops with VFIO and SMMUv3 because VFIO will call
iommu_detach_group() and then immediately iommu_domain_free(), but
SMMUv3 has no way to know that the domain it is holding a pointer to
has been freed. Now the iommu_detach_group() will assign the blocking
domain and SMMUv3 will no longer hold a stale domain reference.

Fixes: 1ea2a07a532b ("iommu: Add DMA ownership management interfaces")
Reported-by: Qian Cai 
Tested-by: Baolu Lu 
Tested-by: Nicolin Chen 
Co-developed-by: Robin Murphy 
Signed-off-by: Robin Murphy 
Signed-off-by: Jason Gunthorpe 
--

Just minor polishing as discussed

v3:
 - Change names to __iommu_group_set_domain() /
   __iommu_group_set_core_domain()
 - Clarify comments
 - Call __iommu_group_set_domain() directly in
   iommu_group_release_dma_owner() since we know it is always selecting
   the default_domain
v2: https://lore.kernel.org/r/0-v2-f62259511ac0+6-iommu_dma_block_...@nvidia.com
 - Remove redundant detach_dev ops check in __iommu_detach_device and
   make the added WARN_ON fail instead
 - Check for blocking_domain in __iommu_attach_group() so VFIO can
   actually attach a new group
 - Update comments and spelling
 - Fix missed change to new_domain in iommu_group_do_detach_device()
v1: 
https://lore.kernel.org/r/0-v1-6e9d2d0a759d+11b-iommu_dma_block_...@nvidia.com

---
 drivers/iommu/iommu.c | 127 ++
 1 file changed, 91 insertions(+), 36 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0c42ece2585406..0b22e51e90f416 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -44,6 +44,7 @@ struct iommu_group {
char *name;
int id;
struct iommu_domain *default_domain;
+   struct iommu_domain *blocking_domain;
struct iommu_domain *domain;
struct list_head entry;
unsigned int owner_cnt;
@@ -82,8 +83,8 @@ static int __iommu_attach_device(struct iommu_domain *domain,
 struct device *dev);
 static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group);
-static void __iommu_detach_group(struct iommu_domain *domain,
-struct iommu_group *group);
+static int __iommu_group_set_domain(struct iommu_group *group,
+   struct iommu_domain *new_domain);
 static int iommu_create_device_direct_mappings(struct iommu_group *group,
   struct device *dev);
 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
@@ -596,6 +597,8 @@ static void iommu_group_release(struct kobject *kobj)
 
if (group->default_domain)
iommu_domain_free(group->default_domain);
+   if (group->blocking_domain)
+   iommu_domain_free(group->blocking_domain);
 
kfree(group->name);
kfree(group);
@@ -1907,6 +1910,24 @@ void iommu_domain_free(struct iommu_domain *domain)
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
 
+/*
+ * Put the group's domain back to the appropriate core-owned domain - either 
the
+ * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
+ */
+static void __iommu_group_set_core_domain(struct iommu_group *group)
+{
+   struct iommu_domain *new_domain;
+   int ret;
+
+   if (group->owner)
+   new_domain = group->blocking_domain;
+   else
+   new_domain = group->default_domain;
+
+   ret = __iommu_group_set_domain(group, new_domain);
+   WARN(ret, "iommu driver failed to attach the default/blocking domain");
+}
+
 static int __iommu_attach_device(struct iommu_domain *domain,
 struct device *dev)
 {
@@ -1963,9 +1984,6 @@ static void __iommu_detach_device(struct iommu_domain 
*domain,
if (iommu_is_attach_deferred(dev))
return;
 
-   if (unlikely(domain->ops->detach_

Re: fully convert arm to use dma-direct

2022-05-09 Thread Russell King (Oracle)
On Fri, Apr 22, 2022 at 11:17:20PM +0200, Linus Walleij wrote:
> On Thu, Apr 21, 2022 at 9:42 AM Christoph Hellwig  wrote:
> 
> > arm is the last platform not using the dma-direct code for directly
> > mapped DMA.  With the dmaboune removal from Arnd we can easily switch
> > arm to always use dma-direct now (it already does for LPAE configs
> > and nommu).  I'd love to merge this series through the dma-mapping tree
> > as it gives us the opportunity for additional core dma-mapping
> > improvements.
> (...)
> 
> >  b/arch/arm/mach-footbridge/Kconfig   |1
> >  b/arch/arm/mach-footbridge/common.c  |   19
> >  b/arch/arm/mach-footbridge/include/mach/dma-direct.h |8
> >  b/arch/arm/mach-footbridge/include/mach/memory.h |4
> 
> I think Marc Z has a Netwinder that he can test this on. Marc?
> I have one too, just not much in my office because of parental leave.

Netwinder is just more of a PC-but-with-ARM - it doesn't make use of any
of the dmabounce code.

Assabet is what needs testing for that, or one of the SA1110 machines.
I'm away from home on my boat (and have been for the last two and a bit
weeks) so can't test. Sorry.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 22/29] x86/watchdog/hardlockup: Add an HPET-based hardlockup detector

2022-05-09 Thread Thomas Gleixner
On Thu, May 05 2022 at 17:00, Ricardo Neri wrote:
> + if (is_hpet_hld_interrupt(hdata)) {
> + /*
> +  * Kick the timer first. If the HPET channel is periodic, it
> +  * helps to reduce the delta between the expected TSC value and
> +  * its actual value the next time the HPET channel fires.
> +  */
> + kick_timer(hdata, !(hdata->has_periodic));
> +
> + if (cpumask_weight(hld_data->monitored_cpumask) > 1) {
> + /*
> +  * Since we cannot know the source of an NMI, the best
> +  * we can do is to use a flag to indicate to all online
> +  * CPUs that they will get an NMI and that the source of
> +  * that NMI is the hardlockup detector. Offline CPUs
> +  * also receive the NMI but they ignore it.
> +  *
> +  * Even though we are in NMI context, we have concluded
> +  * that the NMI came from the HPET channel assigned to
> +  * the detector, an event that is infrequent and only
> +  * occurs in the handling CPU. There should not be races
> +  * with other NMIs.
> +  */
> + cpumask_copy(hld_data->inspect_cpumask,
> +  cpu_online_mask);
> +
> + /* If we are here, IPI shorthands are enabled. */
> + apic->send_IPI_allbutself(NMI_VECTOR);

So if the monitored cpumask is a subset of online CPUs, which is the
case when isolation features are enabled, then you still send NMIs to
those isolated CPUs. I'm sure the isolation folks will be enthused.

Thanks,

tglx
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RFC 11/12] iommufd: vfio container FD ioctl compatibility

2022-05-09 Thread Jason Gunthorpe via iommu
On Mon, May 09, 2022 at 04:01:52PM +1000, David Gibson wrote:

> > The default iommu_domain that the iommu driver creates will be used
> > here, it is up to the iommu driver to choose something reasonable for
> > use by applications like DPDK. ie PPC should probably pick its biggest
> > x86-like aperture.
> 
> So, using the big aperture means a very high base IOVA
> (1<<59)... which means that it won't work at all if you want to attach
> any devices that aren't capable of 64-bit DMA.

I'd expect to include the 32 bit window too..

> Using the maximum possible window size would mean we either
> potentially waste a lot of kernel memory on pagetables, or we use
> unnecessarily large number of levels to the pagetable.

All drivers have this issue to one degree or another. We seem to be
ignoring it - in any case this is a micro optimization, not a
functional need?

> More generally, the problem with the interface advertising limitations
> and it being up to userspace to work out if those are ok or not is
> that it's fragile.  It's pretty plausible that some future IOMMU model
> will have some new kind of limitation that can't be expressed in the
> query structure we invented now.

The basic API is very simple - the driver needs to provide ranges of
IOVA and map/unmap - I don't think we have a future problem here we
need to try and guess and solve today.

Even PPC fits this just fine, the open question for DPDK is more
around optimization, not functional.

> But if userspace requests the capabilities it wants, and the kernel
> acks or nacks that, we can support the new host IOMMU with existing
> software just fine.

No, this just makes it fragile in the other direction because now
userspace has to know what platform specific things to ask for *or it
doesn't work at all*. This is not a improvement for the DPDK cases.

Kernel decides, using all the kernel knowledge it has and tells the
application what it can do - this is the basic simplified interface.

> > The iommu-driver-specific struct is the "advanced" interface and
> > allows a user-space IOMMU driver to tightly control the HW with full
> > HW specific knowledge. This is where all the weird stuff that is not
> > general should go.
> 
> Right, but forcing anything more complicated than "give me some IOVA
> region" to go through the advanced interface means that qemu (or any
> hypervisor where the guest platform need not identically match the
> host) has to have n^2 complexity to match each guest IOMMU model to
> each host IOMMU model.

I wouldn't say n^2, but yes, qemu needs to have a userspace driver for
the platform IOMMU, and yes it needs this to reach optimal
behavior. We already know this is a hard requirement for using nesting
as acceleration, I don't see why apertures are so different.

> Errr.. how do you figure?  On ppc the ranges and pagesizes are
> definitely negotiable.  I'm not really familiar with other models, but
> anything which allows *any* variations in the pagetable structure will
> effectively have at least some negotiable properties.

As above, if you ask for the wrong thing then you don't get
anything. If DPDK asks for something that works on ARM like 0 -> 4G
then PPC and x86 will always fail. How is this improving anything to
require applications to carefully ask for exactly the right platform
specific ranges?

It isn't like there is some hard coded value we can put into DPDK that
will work on every platform. So kernel must pick for DPDK, IMHO. I
don't see any feasible alternative.

> Which is why I'm suggesting that the base address be an optional
> request.  DPDK *will* care about the size of the range, so it just
> requests that and gets told a base address.

We've talked about a size of IOVA address space before, strictly as a
hint, to possible optimize page table layout, or something, and I'm
fine with that idea. But - we have no driver implementation today, so
I'm not sure what we can really do with this right now..

Kevin could Intel consume a hint on IOVA space and optimize the number
of IO page table levels?

> > and IMHO, qemu
> > is fine to have a PPC specific userspace driver to tweak this PPC
> > unique thing if the default windows are not acceptable.
> 
> Not really, because it's the ppc *target* (guest) side which requires
> the specific properties, but selecting the "advanced" interface
> requires special knowledge on the *host* side.

The ppc specific driver would be on the generic side of qemu in its
viommu support framework. There is lots of host driver optimization
possible here with knowledge of the underlying host iommu HW. It
should not be connected to the qemu target.

It is not so different from today where qemu has to know about ppc's
special vfio interface generically even to emulate x86.

> > IMHO it is no different from imagining an Intel specific userspace
> > driver that is using userspace IO pagetables to optimize
> > cross-platform qemu vIOMMU emulation.
> 
> I'm not quite sure what you have in mind he

Re: [PATCH v6 21/29] x86/nmi: Add an NMI_WATCHDOG NMI handler category

2022-05-09 Thread Thomas Gleixner
On Thu, May 05 2022 at 17:00, Ricardo Neri wrote:
> Add a NMI_WATCHDOG as a new category of NMI handler. This new category
> is to be used with the HPET-based hardlockup detector. This detector
> does not have a direct way of checking if the HPET timer is the source of
> the NMI. Instead, it indirectly estimates it using the time-stamp counter.
>
> Therefore, we may have false-positives in case another NMI occurs within
> the estimated time window. For this reason, we want the handler of the
> detector to be called after all the NMI_LOCAL handlers. A simple way
> of achieving this with a new NMI handler category.
>
> @@ -379,6 +385,10 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
>   }
>   raw_spin_unlock(&nmi_reason_lock);
>  
> + handled = nmi_handle(NMI_WATCHDOG, regs);
> + if (handled == NMI_HANDLED)
> + goto out;
> +

How is this supposed to work reliably?

If perf is active and the HPET NMI and the perf NMI come in around the
same time, then nmi_handle(LOCAL) can swallow the NMI and the watchdog
won't be checked. Because MSI is strictly edge and the message is only
sent once, this can result in a stale watchdog, no?

Thanks,

tglx



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH V2 0/2] swiotlb: Add child io tlb mem support

2022-05-09 Thread Tianyu Lan

On 5/2/2022 8:54 PM, Tianyu Lan wrote:

From: Tianyu Lan 

Traditionally swiotlb was not performance critical because it was only
used for slow devices. But in some setups, like TDX/SEV confidential
guests, all IO has to go through swiotlb. Currently swiotlb only has a
single lock. Under high IO load with multiple CPUs this can lead to
significant lock contention on the swiotlb lock.

This patch adds child IO TLB mem support to resolve spinlock overhead
among device's queues. Each device may allocate IO tlb mem and setup
child IO TLB mem according to queue number. The number child IO tlb
mem maybe set up equal with device queue number and this helps to resolve
swiotlb spinlock overhead among devices and queues.

Patch 2 introduces IO TLB Block concepts and swiotlb_device_allocate()
API to allocate per-device swiotlb bounce buffer. The new API Accepts
queue number as the number of child IO TLB mem to set up device's IO
TLB mem.


Gentile ping...

Thanks.


Tianyu Lan (2):
   swiotlb: Add Child IO TLB mem support
   Swiotlb: Add device bounce buffer allocation interface

  include/linux/swiotlb.h |  40 ++
  kernel/dma/swiotlb.c| 290 ++--
  2 files changed, 317 insertions(+), 13 deletions(-)


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] iommu/dma: Explicitly sort PCI DMA windows

2022-05-09 Thread Robin Murphy
Originally, creating the dma_ranges resource list in pre-sorted fashion
was the simplest and most efficient way to enforce the order required by
iova_reserve_pci_windows(). However since then at least one PCI host
driver is now re-sorting the list for its own probe-time processing,
which doesn't seem entirely unreasonable, so that basic assumption no
longer holds. Make iommu-dma robust and get the sort order it needs by
explicitly sorting, which means we can also save the effort at creation
time and just build the list in whatever natural order the DT had.

Signed-off-by: Robin Murphy 
---

v2: Clean up now-unused local variable

 drivers/iommu/dma-iommu.c | 13 -
 drivers/pci/of.c  |  8 +---
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 09f6e1c0f9c0..d05538af4fe9 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -414,6 +415,15 @@ static int cookie_init_hw_msi_region(struct 
iommu_dma_cookie *cookie,
return 0;
 }
 
+static int iommu_dma_ranges_sort(void *priv, const struct list_head *a,
+   const struct list_head *b)
+{
+   struct resource_entry *res_a = list_entry(a, typeof(*res_a), node);
+   struct resource_entry *res_b = list_entry(b, typeof(*res_b), node);
+
+   return res_a->res->start > res_b->res->start;
+}
+
 static int iova_reserve_pci_windows(struct pci_dev *dev,
struct iova_domain *iovad)
 {
@@ -432,6 +442,7 @@ static int iova_reserve_pci_windows(struct pci_dev *dev,
}
 
/* Get reserved DMA windows from host bridge */
+   list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort);
resource_list_for_each_entry(window, &bridge->dma_ranges) {
end = window->res->start - window->offset;
 resv_iova:
@@ -440,7 +451,7 @@ static int iova_reserve_pci_windows(struct pci_dev *dev,
hi = iova_pfn(iovad, end);
reserve_iova(iovad, lo, hi);
} else if (end < start) {
-   /* dma_ranges list should be sorted */
+   /* DMA ranges should be non-overlapping */
dev_err(&dev->dev,
"Failed to reserve IOVA [%pa-%pa]\n",
&start, &end);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index cb2e8351c2cc..8f0ebaf9ae85 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -369,7 +369,6 @@ static int devm_of_pci_get_host_bridge_resources(struct 
device *dev,
 
dev_dbg(dev, "Parsing dma-ranges property...\n");
for_each_of_pci_range(&parser, &range) {
-   struct resource_entry *entry;
/*
 * If we failed translation or got a zero-sized region
 * then skip this range
@@ -393,12 +392,7 @@ static int devm_of_pci_get_host_bridge_resources(struct 
device *dev,
goto failed;
}
 
-   /* Keep the resource list sorted */
-   resource_list_for_each_entry(entry, ib_resources)
-   if (entry->res->start > res->start)
-   break;
-
-   pci_add_resource_offset(&entry->node, res,
+   pci_add_resource_offset(ib_resources, res,
res->start - range.pci_addr);
}
 
-- 
2.35.3.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu: iommu_group_claim_dma_owner() must always assign a domain

2022-05-09 Thread Robin Murphy

On 2022-05-06 14:54, Jason Gunthorpe wrote:

On Fri, May 06, 2022 at 02:35:50PM +0100, Robin Murphy wrote:


So you want to say "DMA is always managed" when attaching a domain of
type IOMMU_DOMAIN_UNMANAGED? :)


Touché ;) Just goes to confirm the point above that confusion between
general concepts and specific API terms is all too easy. An "unmanaged"
domain from the PoV of the API just means it's managed by the external
caller, but you're right that that's not necessarily so obvious either.


Yeah, I'm not so keen on the naming used for IOMMU_DOMAIN_*

I looked for a bit and could not figure out why we need to have
IOMMU_DOMAIN_DMA either.. I didn't find anthing obvious in the iommu
drivers that looked like a special case for this? Most drivers treat
it identically to UNMANAGED in their alloc functions

Only mtk, arm-smmu and some odd stuff in Intel seemed to be sensitive?


Yes, that's a relatively recent change[1] - prior to that, drivers did 
still have to take (minimal) action to opt into iommu-dma support. 
IOMMU_DOMAIN_DMA still needs to exist as a type for the core code to 
differentiate internally, so driver involvement is mostly now just a 
case of saying "yeah OK fine" if they see it. IOMMU_DOMAIN_DMA_FQ now 
effectively takes over the original semantics of IOMMU_DOMAIN_DMA as the 
one that depends on driver-specific functionality.


FWIW, mtk-iommu doesn't really have any need to reject 
IOMMU_DOMAIN_UNMANAGED, they just don't have any relevant client drivers 
that want it; however arm-smmu definitely does need to continue 
rejecting IOMMU_DOMAIN_DMA when it can't rely on the DT code ensuring 
the correct probe ordering with the old DT binding (otherwise client 
drivers are liable to get broken DMA ops).


Thanks,
Robin.

[1] 
https://lore.kernel.org/linux-iommu/cover.1628682048.git.robin.mur...@arm.com/

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/dma: Fix iova map result check bug

2022-05-09 Thread Robin Murphy

On 2022-05-07 09:52, yf.w...@mediatek.com wrote:

From: Yunfei Wang 

The data type of the return value of the iommu_map_sg_atomic
is ssize_t, but the data type of iova size is size_t,
e.g. one is int while the other is unsigned int.

When iommu_map_sg_atomic return value is compared with iova size,
it will force the signed int to be converted to unsigned int, if
iova map fails and iommu_map_sg_atomic return error code is less
than 0, then (ret < iova_len) is false, which will to cause not
do free iova, and the master can still successfully get the iova
of map fail, which is not expected.

Therefore, we need to check the return value of iommu_map_sg_atomic
in two cases according to whether it is less than 0.


Heh, it's always a fun day when I have to go back to the C standard to 
remind myself of the usual arithmetic conversions. But indeed this seems 
correct, and even though the double comparisons look a little 
non-obvious on their own I can't think of an objectively better 
alternative, so:


Reviewed-by: Robin Murphy 


Fixes: ad8f36e4b6b1 ("iommu: return full error code from 
iommu_map_sg[_atomic]()")
Signed-off-by: Yunfei Wang 
Cc:  # 5.15.*
---
  drivers/iommu/dma-iommu.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 09f6e1c0f9c0..2932281e93fc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -776,6 +776,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
struct page **pages;
dma_addr_t iova;
+   ssize_t ret;
  
  	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&

iommu_deferred_attach(dev, domain))
@@ -813,8 +814,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
  
-	if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot)

-   < size)
+   ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, 
ioprot);
+   if (ret < 0 || ret < size)
goto out_free_sg;
  
  	sgt->sgl->dma_address = iova;

@@ -1209,7 +1210,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
 * implementation - it knows better than we do.
 */
ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
-   if (ret < iova_len)
+   if (ret < 0 || ret < iova_len)
goto out_free_iova;
  
  	return __finalise_sg(dev, sg, nents, iova);

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] iommu/amd: Set translation valid bit only when IO page tables are in used

2022-05-09 Thread Suravee Suthikulpanit via iommu
On AMD system with SNP enabled, IOMMU hardware checks the host translation
valid (TV) and guest translation valid (GV) bits in the device
table entry (DTE) before accessing the corresponded page tables.

However, current IOMMU driver sets the TV bit for all devices
regardless of whether the host page table is in used.
This results in ILLEGAL_DEV_TABLE_ENTRY event for devices, which
do not the host page table root pointer set up.

Thefore, only set TV bit when DMA remapping is not used, which is
when domain ID in the AMD IOMMU device table entry (DTE) is zero.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/init.c  | 4 +---
 drivers/iommu/amd/iommu.c | 8 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 648d6b94ba8c..6a2dadf2b2dc 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2336,10 +2336,8 @@ static void init_device_table_dma(void)
 {
u32 devid;
 
-   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
set_dev_entry_bit(devid, DEV_ENTRY_VALID);
-   set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
-   }
 }
 
 static void __init uninit_device_table_dma(void)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index a1ada7bff44e..cea254968f06 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1473,7 +1473,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain,
 
pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -1513,6 +1513,10 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain,
flags|= tmp;
}
 
+   /* Only set TV bit when IOMMU page translation is in used */
+   if (domain->id != 0)
+   pte_root |= DTE_FLAG_TV;
+
flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
@@ -1535,7 +1539,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain,
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu