Re: [PATCH 02/13] iommu/vt-d: Use dev_iommu_priv_get/set()

2020-06-25 Thread Lu Baolu

Hi Joerg,

On 2020/6/25 21:08, Joerg Roedel wrote:

From: Joerg Roedel 

Remove the use of dev->archdata.iommu and use the private per-device
pointer provided by IOMMU core code instead.

Signed-off-by: Joerg Roedel 
---
  .../gpu/drm/i915/selftests/mock_gem_device.c   | 10 --
  drivers/iommu/intel/iommu.c| 18 +-


For changes in VT-d driver,

Reviewed-by: Lu Baolu 

Best regards,
baolu


  2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c 
b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9b105b811f1f..e08601905a64 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -24,6 +24,7 @@
  
  #include 

  #include 
+#include 
  
  #include 
  
@@ -118,6 +119,9 @@ struct drm_i915_private *mock_gem_device(void)

  {
struct drm_i915_private *i915;
struct pci_dev *pdev;
+#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
+   struct dev_iommu iommu;
+#endif
int err;
  
  	pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);

@@ -136,8 +140,10 @@ struct drm_i915_private *mock_gem_device(void)
dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(64));
  
  #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)

-   /* hack to disable iommu for the fake device; force identity mapping */
-   pdev->dev.archdata.iommu = (void *)-1;
+   /* HACK HACK HACK to disable iommu for the fake device; force identity 
mapping */
+   memset(, 0, sizeof(iommu));
+   iommu.priv = (void *)-1;
+   pdev->dev.iommu = 
  #endif
  
  	pci_set_drvdata(pdev, i915);

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..2ce490c2eab8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -372,7 +372,7 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
if (!dev)
return NULL;
  
-	info = dev->archdata.iommu;

+   info = dev_iommu_priv_get(dev);
if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
 info == DEFER_DEVICE_DOMAIN_INFO))
return NULL;
@@ -743,12 +743,12 @@ struct context_entry *iommu_context_addr(struct 
intel_iommu *iommu, u8 bus,
  
  static int iommu_dummy(struct device *dev)

  {
-   return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+   return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO;
  }
  
  static bool attach_deferred(struct device *dev)

  {
-   return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+   return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
  }
  
  /**

@@ -2420,7 +2420,7 @@ static inline void unlink_domain_info(struct 
device_domain_info *info)
list_del(>link);
list_del(>global);
if (info->dev)
-   info->dev->archdata.iommu = NULL;
+   dev_iommu_priv_set(info->dev, NULL);
  }
  
  static void domain_remove_dev_info(struct dmar_domain *domain)

@@ -2453,7 +2453,7 @@ static void do_deferred_attach(struct device *dev)
  {
struct iommu_domain *domain;
  
-	dev->archdata.iommu = NULL;

+   dev_iommu_priv_set(dev, NULL);
domain = iommu_get_domain_for_dev(dev);
if (domain)
intel_iommu_attach_device(domain, dev);
@@ -2599,7 +2599,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(>link, >devices);
list_add(>global, _domain_list);
if (dev)
-   dev->archdata.iommu = info;
+   dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(_domain_lock, flags);
  
  	/* PASID table is mandatory for a PCI device in scalable mode. */

@@ -4004,7 +4004,7 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev 
*pdev)
if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) 
QuickData Technology device\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
-   pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+   dev_iommu_priv_set(>dev, DUMMY_DEVICE_DOMAIN_INFO);
}
  }
  DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, 
quirk_ioat_snb_local_iommu);
@@ -4043,7 +4043,7 @@ static void __init init_no_remapping_devices(void)
drhd->ignored = 1;
for_each_active_dev_scope(drhd->devices,
  drhd->devices_cnt, i, dev)
-   dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+   dev_iommu_priv_set(dev, 
DUMMY_DEVICE_DOMAIN_INFO);
}
}
  }
@@ -5665,7 +5665,7 @@ static stru

Re: [PATCH v2 11/12] x86/mmu: Allocate/free PASID

2020-06-14 Thread Lu Baolu

Hi Fenghua,

On 6/13/20 8:41 AM, Fenghua Yu wrote:

A PASID is allocated for an "mm" the first time any thread attaches
to an SVM capable device. Later device attachments (whether to the same
device or another SVM device) will re-use the same PASID.

The PASID is freed when the process exits (so no need to keep
reference counts on how many SVM devices are sharing the PASID).

Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Define a helper free_bind() to simplify error exit code in bind_mm()
   (Thomas)
- Fix a ret error code in bind_mm() (Thomas)
- Change pasid's type from "int" to "unsigned int" to have consistent
   pasid type in iommu (Thomas)
- Simplify alloc_pasid() a bit.

  arch/x86/include/asm/iommu.h   |   2 +
  arch/x86/include/asm/mmu_context.h |  14 
  drivers/iommu/intel/svm.c  | 101 +
  3 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74b..ed41259fe7ac 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -26,4 +26,6 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
return -EINVAL;
  }
  
+void __free_pasid(struct mm_struct *mm);

+
  #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 47562147e70b..f8c91ce8c451 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
  #include 
  #include 
  #include 
+#include 
  
  extern atomic64_t last_mm_ctx_id;
  
@@ -117,9 +118,22 @@ static inline int init_new_context(struct task_struct *tsk,

init_new_context_ldt(mm);
return 0;
  }
+
+static inline void free_pasid(struct mm_struct *mm)
+{
+   if (!IS_ENABLED(CONFIG_INTEL_IOMMU_SVM))
+   return;
+
+   if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+   return;
+
+   __free_pasid(mm);
+}
+
  static inline void destroy_context(struct mm_struct *mm)
  {
destroy_context_ldt(mm);
+   free_pasid(mm);
  }
  
  extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4e775e12ae52..27dc866b8461 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -425,6 +425,53 @@ int intel_svm_unbind_gpasid(struct device *dev, unsigned 
int pasid)
return ret;
  }
  
+static void free_bind(struct intel_svm *svm, struct intel_svm_dev *sdev,

+ bool new_pasid)
+{
+   if (new_pasid)
+   ioasid_free(svm->pasid);
+   kfree(svm);
+   kfree(sdev);
+}
+
+/*
+ * If this mm already has a PASID, use it. Otherwise allocate a new one.
+ * Let the caller know if a new PASID is allocated via 'new_pasid'.
+ */
+static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
+  unsigned int pasid_max, bool *new_pasid,
+  unsigned int flags)
+{
+   unsigned int pasid;
+
+   *new_pasid = false;
+
+   /*
+* Reuse the PASID if the mm already has a PASID and not a private
+* PASID is requested.
+*/
+   if (mm && mm->pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+   /*
+* Once a PASID is allocated for this mm, the PASID
+* stays with the mm until the mm is dropped. Reuse
+* the PASID which has been already allocated for the
+* mm instead of allocating a new one.
+*/
+   ioasid_set_data(mm->pasid, svm);


How about adding some sanity checks here? For example,

void *p = ioasid_find(NULL, mm->pasid, NULL);

if (!p)
ioasid_set_data(mm->pasid, svm);
else if (IS_ERR(p) || p != svm)
return INVALID_IOSASID;

Best regards,
baolu


Re: [PATCH v2 11/12] x86/mmu: Allocate/free PASID

2020-06-13 Thread Lu Baolu

Hi Fenghua,

On 2020/6/13 8:41, Fenghua Yu wrote:

A PASID is allocated for an "mm" the first time any thread attaches
to an SVM capable device. Later device attachments (whether to the same
device or another SVM device) will re-use the same PASID.

The PASID is freed when the process exits (so no need to keep
reference counts on how many SVM devices are sharing the PASID).


FYI.

Jean-Philippe Brucker has a patch for mm->pasid management in the vendor
agnostic manner.

https://www.spinics.net/lists/iommu/msg44459.html

Best regards,
baolu



Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Define a helper free_bind() to simplify error exit code in bind_mm()
   (Thomas)
- Fix a ret error code in bind_mm() (Thomas)
- Change pasid's type from "int" to "unsigned int" to have consistent
   pasid type in iommu (Thomas)
- Simplify alloc_pasid() a bit.

  arch/x86/include/asm/iommu.h   |   2 +
  arch/x86/include/asm/mmu_context.h |  14 
  drivers/iommu/intel/svm.c  | 101 +
  3 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74b..ed41259fe7ac 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -26,4 +26,6 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
return -EINVAL;
  }
  
+void __free_pasid(struct mm_struct *mm);

+
  #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 47562147e70b..f8c91ce8c451 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
  #include 
  #include 
  #include 
+#include 
  
  extern atomic64_t last_mm_ctx_id;
  
@@ -117,9 +118,22 @@ static inline int init_new_context(struct task_struct *tsk,

init_new_context_ldt(mm);
return 0;
  }
+
+static inline void free_pasid(struct mm_struct *mm)
+{
+   if (!IS_ENABLED(CONFIG_INTEL_IOMMU_SVM))
+   return;
+
+   if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+   return;
+
+   __free_pasid(mm);
+}
+
  static inline void destroy_context(struct mm_struct *mm)
  {
destroy_context_ldt(mm);
+   free_pasid(mm);
  }
  
  extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4e775e12ae52..27dc866b8461 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -425,6 +425,53 @@ int intel_svm_unbind_gpasid(struct device *dev, unsigned 
int pasid)
return ret;
  }
  
+static void free_bind(struct intel_svm *svm, struct intel_svm_dev *sdev,

+ bool new_pasid)
+{
+   if (new_pasid)
+   ioasid_free(svm->pasid);
+   kfree(svm);
+   kfree(sdev);
+}
+
+/*
+ * If this mm already has a PASID, use it. Otherwise allocate a new one.
+ * Let the caller know if a new PASID is allocated via 'new_pasid'.
+ */
+static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
+  unsigned int pasid_max, bool *new_pasid,
+  unsigned int flags)
+{
+   unsigned int pasid;
+
+   *new_pasid = false;
+
+   /*
+* Reuse the PASID if the mm already has a PASID and not a private
+* PASID is requested.
+*/
+   if (mm && mm->pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+   /*
+* Once a PASID is allocated for this mm, the PASID
+* stays with the mm until the mm is dropped. Reuse
+* the PASID which has been already allocated for the
+* mm instead of allocating a new one.
+*/
+   ioasid_set_data(mm->pasid, svm);
+
+   return mm->pasid;
+   }
+
+   /* Allocate a new pasid. Do not use PASID 0, reserved for init PASID. */
+   pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1, svm);
+   if (pasid != INVALID_IOASID) {
+   /* A new pasid is allocated. */
+   *new_pasid = true;
+   }
+
+   return pasid;
+}
+
  /* Caller must hold pasid_mutex, mm reference */
  static int
  intel_svm_bind_mm(struct device *dev, unsigned int flags,
@@ -518,6 +565,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
init_rcu_head(>rcu);
  
  	if (!svm) {

+   bool new_pasid;
+
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
ret = -ENOMEM;
@@ -529,12 +578,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;
  
-		/* Do not use PASID 0, reserved for RID to PASID */

-   svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
+   svm->pasid = alloc_pasid(svm, mm, 

Re: [PATCH v2 04/12] docs: x86: Add documentation for SVA (Shared Virtual Addressing)

2020-06-13 Thread Lu Baolu

Hi Fenghua,

On 2020/6/13 8:41, Fenghua Yu wrote:

From: Ashok Raj 

ENQCMD and Data Streaming Accelerator (DSA) and all of their associated
features are a complicated stack with lots of interconnected pieces.
This documentation provides a big picture overview for all of the
features.

Signed-off-by: Ashok Raj 
Co-developed-by: Fenghua Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Tony Luck 
---
v2:
- Fix the doc format and add the doc in toctree (Thomas)
- Modify the doc for better description (Thomas, Tony, Dave)

  Documentation/x86/index.rst |   1 +
  Documentation/x86/sva.rst   | 287 
  2 files changed, 288 insertions(+)
  create mode 100644 Documentation/x86/sva.rst

diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 265d9e9a093b..e5d5ff096685 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -30,3 +30,4 @@ x86-specific Documentation
 usb-legacy-support
 i386/index
 x86_64/index
+   sva
diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
new file mode 100644
index ..1e52208c7dda
--- /dev/null
+++ b/Documentation/x86/sva.rst
@@ -0,0 +1,287 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+Shared Virtual Addressing (SVA) with ENQCMD
+===
+
+Background
+==
+
+Shared Virtual Addressing (SVA) allows the processor and device to use the
+same virtual addresses avoiding the need for software to translate virtual
+addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+Memory (SVM)
+
+In addition to the convenience of using application virtual addresses
+by the device, it also doesn't require pinning pages for DMA.
+PCIe Address Translation Services (ATS) along with Page Request Interface
+(PRI) allow devices to function much the same way as the CPU handling
+application page-faults. For more information please refer to PCIe
+specification Chapter 10: ATS Specification.
+
+Use of SVA requires IOMMU support in the platform. IOMMU also is required
+to support PCIe features ATS and PRI. ATS allows devices to cache
+translations for the virtual address. IOMMU driver uses the mmu_notifier()
+support to keep the device tlb cache and the CPU cache in sync. PRI allows
+the device to request paging the virtual address before using if they are
+not paged in the CPU page tables.
+
+
+Shared Hardware Workqueues
+==
+
+Unlike Single Root I/O Virtualization (SRIOV), Scalable IOV (SIOV) permits
+the use of Shared Work Queues (SWQ) by both applications and Virtual
+Machines (VM's). This allows better hardware utilization vs. hard
+partitioning resources that could result in under utilization. In order to
+allow the hardware to distinguish the context for which work is being
+executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ID (PASID), which is a 20bit number defined by the PCIe SIG.
+
+PASID value is encoded in all transactions from the device. This allows the
+IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+Resource Identifier (RID) which is the Bus/Device/Function.
+
+
+ENQCMD
+==
+
+ENQCMD is a new instruction on Intel platforms that atomically submits a
+work descriptor to a device. The descriptor includes the operation to be
+performed, virtual addresses of all parameters, virtual address of a completion
+record, and the PASID (process address space ID) of the current process.
+
+ENQCMD works with non-posted semantics and carries a status back if the
+command was accepted by hardware. This allows the submitter to know if the
+submission needs to be retried or other device specific mechanisms to
+implement implement fairness or ensure forward progress can be made.


Repeated "implement".


+
+ENQCMD is the glue that ensures applications can directly submit commands
+to the hardware and also permit hardware to be aware of application context
+to perform I/O operations via use of PASID.
+
+Process Address Space Tagging
+=
+
+A new thread scoped MSR (IA32_PASID) provides the connection between
+user processes and the rest of the hardware. When an application first
+accesses an SVA capable device this MSR is initialized with a newly
+allocated PASID. The driver for the device calls an IOMMU specific api
+that sets up the routing for DMA and page-requests.
+
+For example, the Intel Data Streaming Accelerator (DSA) uses
+intel_svm_bind_mm(), which will do the following.


The Intel SVM APIs have been deprecated. Drivers should use
iommu_sva_bind_device() instead. Please also update other places in
this document.


+
+- Allocate the PASID, and program the process page-table (cr3) in the PASID
+  context entries.
+- Register for mmu_notifier() to track any page-table invalidations to keep
+  the device tlb in sync. For example, when a page-table entry is invalidated,
+  IOMMU propagates the