RE: [PATCH v2 06/12] iommu/vt-d: Add second level page table interface

2018-09-12 Thread Tian, Kevin
> From: Raj, Ashok
> Sent: Saturday, September 8, 2018 1:43 AM
> 
> On Fri, Sep 07, 2018 at 10:47:11AM +0800, Lu Baolu wrote:
> >
> > >>+
> > >>+ intel_pasid_clear_entry(dev, pasid);
> > >>+
> > >>+ if (!ecap_coherent(iommu->ecap)) {
> > >>+ pte = intel_pasid_get_entry(dev, pasid);
> > >>+ clflush_cache_range(pte, sizeof(*pte));
> > >>+ }
> > >>+
> > >>+ pasid_based_pasid_cache_invalidation(iommu, did, pasid);
> > >>+ pasid_based_iotlb_cache_invalidation(iommu, did, pasid);
> > >>+
> > >>+ /* Device IOTLB doesn't need to be flushed in caching mode. */
> > >>+ if (!cap_caching_mode(iommu->cap))
> > >>+ pasid_based_dev_iotlb_cache_invalidation(iommu, dev,
> > >>pasid);
> > >
> > >can you elaborate, or point to any spec reference?
> > >
> >
> > In the driver, device iotlb doesn't get flushed in caching mode. I just
> > follow what have been done there.
> >
> > It also makes sense to me since only the bare metal host needs to
> > consider whether and how to flush the device iotlb.
> >
> 
> DavidW might remember, i think the idea was to help with cost
> of virtualization, we can avoid taking 2 exits vs handling
> it directly when we do iotlb flushing instead.
> 

OK, performance-wise it makes sense. though strictly speaking it
doesn't follow spec...

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 06/12] iommu/vt-d: Add second level page table interface

2018-09-07 Thread Raj, Ashok
On Fri, Sep 07, 2018 at 10:47:11AM +0800, Lu Baolu wrote:
> 
> >>+
> >>+   intel_pasid_clear_entry(dev, pasid);
> >>+
> >>+   if (!ecap_coherent(iommu->ecap)) {
> >>+   pte = intel_pasid_get_entry(dev, pasid);
> >>+   clflush_cache_range(pte, sizeof(*pte));
> >>+   }
> >>+
> >>+   pasid_based_pasid_cache_invalidation(iommu, did, pasid);
> >>+   pasid_based_iotlb_cache_invalidation(iommu, did, pasid);
> >>+
> >>+   /* Device IOTLB doesn't need to be flushed in caching mode. */
> >>+   if (!cap_caching_mode(iommu->cap))
> >>+   pasid_based_dev_iotlb_cache_invalidation(iommu, dev,
> >>pasid);
> >
> >can you elaborate, or point to any spec reference?
> >
> 
> In the driver, device iotlb doesn't get flushed in caching mode. I just
> follow what have been done there.
> 
> It also makes sense to me since only the bare metal host needs to
> consider whether and how to flush the device iotlb.
> 

DavidW might remember, i think the idea was to help with cost
of virtualization, we can avoid taking 2 exits vs handling
it directly when we do iotlb flushing instead.

the other optimization was to only do devtlb flushing when you unmap 
since when establish not-present to present there is no need to 
flush devtlb at that point.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 06/12] iommu/vt-d: Add second level page table interface

2018-09-06 Thread Lu Baolu

Hi,

On 09/06/2018 11:11 AM, Tian, Kevin wrote:

From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, August 30, 2018 9:35 AM

This adds the interfaces to setup or tear down the structures
for second level page table translations. This includes types
of second level only translation and pass through.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 
Signed-off-by: Sanjay Kumar 
Signed-off-by: Lu Baolu 
Reviewed-by: Ashok Raj 
---
  drivers/iommu/intel-iommu.c |   2 +-
  drivers/iommu/intel-pasid.c | 246

  drivers/iommu/intel-pasid.h |   7 +
  include/linux/intel-iommu.h |   3 +
  4 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 562da10bf93e..de6b909bb47a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1232,7 +1232,7 @@ static void iommu_set_root_entry(struct
intel_iommu *iommu)
raw_spin_unlock_irqrestore(>register_lock, flag);
  }

-static void iommu_flush_write_buffer(struct intel_iommu *iommu)
+void iommu_flush_write_buffer(struct intel_iommu *iommu)
  {
u32 val;
unsigned long flag;
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index d6e90cd5b062..edcea1d8b9fc 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -9,6 +9,7 @@

  #define pr_fmt(fmt)   "DMAR: " fmt

+#include 
  #include 
  #include 
  #include 
@@ -291,3 +292,248 @@ void intel_pasid_clear_entry(struct device *dev,
int pasid)

pasid_clear_entry(pe);
  }
+
+static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+   u64 old;
+
+   old = READ_ONCE(*ptr);
+   WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
+
+/*
+ * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_domain_id(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[1], GENMASK_ULL(15, 0), value);
+}
+
+/*
+ * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_address_root(struct pasid_entry *pe, u64 value)


is address_root too general? especially when the entry could contain both
1st level and 2nd level pointers.



Yes. Should be changed to a specific name like pasid_set_slpt_ptr().


+{
+   pasid_set_bits(>val[0], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
+ * entry.
+ */
+static inline void
+pasid_set_address_width(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[0], GENMASK_ULL(4, 2), value << 2);
+}
+
+/*
+ * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_translation_type(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[0], GENMASK_ULL(8, 6), value << 6);
+}
+
+/*
+ * Enable fault processing by clearing the FPD(Fault Processing
+ * Disable) field (Bit 1) of a scalable mode PASID entry.
+ */
+static inline void pasid_set_fault_enable(struct pasid_entry *pe)
+{
+   pasid_set_bits(>val[0], 1 << 1, 0);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+   pasid_set_bits(>val[2], 1 << 0, 1);
+}
+
+/*
+ * Setup the P(Present) field (Bit 0) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_present(struct pasid_entry *pe)
+{
+   pasid_set_bits(>val[0], 1 << 0, 1);
+}


it's a long list and there could be more in the future. What about
defining some macro to simplify LOC, e.g.

#define PASID_SET(name, i, m, b)\
static inline void pasid_set_name(struct pasid_entry *pe)   \
{   \
pasid_set_bits(>val[i], m, b);   \
}

PASID_SET(present, 0, 1<<0, 1);
PASID_SET(sre, 2, 1<<0, 1);
...



Fair enough. This looks more concise.


+
+/*
+ * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+{
+   pasid_set_bits(>val[1], 1 << 23, value);
+}
+
+static void
+pasid_based_pasid_cache_invalidation(struct intel_iommu *iommu,
+int did, int pasid)


pasid_cache_invalidation_with_pasid


Okay.




+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL |
QI_PC_PASID(pasid);
+   desc.qw1 = 0;
+   desc.qw2 = 0;
+   desc.qw3 = 0;
+
+   qi_submit_sync(, iommu);
+}
+
+static void
+pasid_based_iotlb_cache_invalidation(struct intel_iommu *iommu,
+u16 did, u32 pasid)


iotlb_invalidation_with_pasid


Okay.




+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_EIOTLB_PASID(pasid) | 

RE: [PATCH v2 06/12] iommu/vt-d: Add second level page table interface

2018-09-05 Thread Tian, Kevin
> From: Lu Baolu [mailto:baolu...@linux.intel.com]
> Sent: Thursday, August 30, 2018 9:35 AM
> 
> This adds the interfaces to setup or tear down the structures
> for second level page table translations. This includes types
> of second level only translation and pass through.
> 
> Cc: Ashok Raj 
> Cc: Jacob Pan 
> Cc: Kevin Tian 
> Cc: Liu Yi L 
> Signed-off-by: Sanjay Kumar 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Ashok Raj 
> ---
>  drivers/iommu/intel-iommu.c |   2 +-
>  drivers/iommu/intel-pasid.c | 246
> 
>  drivers/iommu/intel-pasid.h |   7 +
>  include/linux/intel-iommu.h |   3 +
>  4 files changed, 257 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 562da10bf93e..de6b909bb47a 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1232,7 +1232,7 @@ static void iommu_set_root_entry(struct
> intel_iommu *iommu)
>   raw_spin_unlock_irqrestore(>register_lock, flag);
>  }
> 
> -static void iommu_flush_write_buffer(struct intel_iommu *iommu)
> +void iommu_flush_write_buffer(struct intel_iommu *iommu)
>  {
>   u32 val;
>   unsigned long flag;
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index d6e90cd5b062..edcea1d8b9fc 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -9,6 +9,7 @@
> 
>  #define pr_fmt(fmt)  "DMAR: " fmt
> 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -291,3 +292,248 @@ void intel_pasid_clear_entry(struct device *dev,
> int pasid)
> 
>   pasid_clear_entry(pe);
>  }
> +
> +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
> +{
> + u64 old;
> +
> + old = READ_ONCE(*ptr);
> + WRITE_ONCE(*ptr, (old & ~mask) | bits);
> +}
> +
> +/*
> + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
> + * PASID entry.
> + */
> +static inline void
> +pasid_set_domain_id(struct pasid_entry *pe, u64 value)
> +{
> + pasid_set_bits(>val[1], GENMASK_ULL(15, 0), value);
> +}
> +
> +/*
> + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
> + * of a scalable mode PASID entry.
> + */
> +static inline void
> +pasid_set_address_root(struct pasid_entry *pe, u64 value)

is address_root too general? especially when the entry could contain both
1st level and 2nd level pointers.

> +{
> + pasid_set_bits(>val[0], VTD_PAGE_MASK, value);
> +}
> +
> +/*
> + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
> + * entry.
> + */
> +static inline void
> +pasid_set_address_width(struct pasid_entry *pe, u64 value)
> +{
> + pasid_set_bits(>val[0], GENMASK_ULL(4, 2), value << 2);
> +}
> +
> +/*
> + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
> + * of a scalable mode PASID entry.
> + */
> +static inline void
> +pasid_set_translation_type(struct pasid_entry *pe, u64 value)
> +{
> + pasid_set_bits(>val[0], GENMASK_ULL(8, 6), value << 6);
> +}
> +
> +/*
> + * Enable fault processing by clearing the FPD(Fault Processing
> + * Disable) field (Bit 1) of a scalable mode PASID entry.
> + */
> +static inline void pasid_set_fault_enable(struct pasid_entry *pe)
> +{
> + pasid_set_bits(>val[0], 1 << 1, 0);
> +}
> +
> +/*
> + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
> + * scalable mode PASID entry.
> + */
> +static inline void pasid_set_sre(struct pasid_entry *pe)
> +{
> + pasid_set_bits(>val[2], 1 << 0, 1);
> +}
> +
> +/*
> + * Setup the P(Present) field (Bit 0) of a scalable mode PASID
> + * entry.
> + */
> +static inline void pasid_set_present(struct pasid_entry *pe)
> +{
> + pasid_set_bits(>val[0], 1 << 0, 1);
> +}

it's a long list and there could be more in the future. What about
defining some macro to simplify LOC, e.g.

#define PASID_SET(name, i, m, b)\
static inline void pasid_set_name(struct pasid_entry *pe)   \
{   \
pasid_set_bits(>val[i], m, b);  \
}

PASID_SET(present, 0, 1<<0, 1);
PASID_SET(sre, 2, 1<<0, 1);
...

> +
> +/*
> + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
> + * entry.
> + */
> +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
> +{
> + pasid_set_bits(>val[1], 1 << 23, value);
> +}
> +
> +static void
> +pasid_based_pasid_cache_invalidation(struct intel_iommu *iommu,
> +  int did, int pasid)

pasid_cache_invalidation_with_pasid

> +{
> + struct qi_desc desc;
> +
> + desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL |
> QI_PC_PASID(pasid);
> + desc.qw1 = 0;
> + desc.qw2 = 0;
> + desc.qw3 = 0;
> +
> + qi_submit_sync(, iommu);
> +}
> +
> +static void
> +pasid_based_iotlb_cache_invalidation(struct intel_iommu *iommu,
> +  u16 did, u32 pasid)

iotlb_invalidation_with_pasid

> +{
> + struct 

[PATCH v2 06/12] iommu/vt-d: Add second level page table interface

2018-08-29 Thread Lu Baolu
This adds the interfaces to setup or tear down the structures
for second level page table translations. This includes types
of second level only translation and pass through.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 
Signed-off-by: Sanjay Kumar 
Signed-off-by: Lu Baolu 
Reviewed-by: Ashok Raj 
---
 drivers/iommu/intel-iommu.c |   2 +-
 drivers/iommu/intel-pasid.c | 246 
 drivers/iommu/intel-pasid.h |   7 +
 include/linux/intel-iommu.h |   3 +
 4 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 562da10bf93e..de6b909bb47a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1232,7 +1232,7 @@ static void iommu_set_root_entry(struct intel_iommu 
*iommu)
raw_spin_unlock_irqrestore(>register_lock, flag);
 }
 
-static void iommu_flush_write_buffer(struct intel_iommu *iommu)
+void iommu_flush_write_buffer(struct intel_iommu *iommu)
 {
u32 val;
unsigned long flag;
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index d6e90cd5b062..edcea1d8b9fc 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -9,6 +9,7 @@
 
 #define pr_fmt(fmt)"DMAR: " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -291,3 +292,248 @@ void intel_pasid_clear_entry(struct device *dev, int 
pasid)
 
pasid_clear_entry(pe);
 }
+
+static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+   u64 old;
+
+   old = READ_ONCE(*ptr);
+   WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
+
+/*
+ * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_domain_id(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[1], GENMASK_ULL(15, 0), value);
+}
+
+/*
+ * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_address_root(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[0], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
+ * entry.
+ */
+static inline void
+pasid_set_address_width(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[0], GENMASK_ULL(4, 2), value << 2);
+}
+
+/*
+ * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_translation_type(struct pasid_entry *pe, u64 value)
+{
+   pasid_set_bits(>val[0], GENMASK_ULL(8, 6), value << 6);
+}
+
+/*
+ * Enable fault processing by clearing the FPD(Fault Processing
+ * Disable) field (Bit 1) of a scalable mode PASID entry.
+ */
+static inline void pasid_set_fault_enable(struct pasid_entry *pe)
+{
+   pasid_set_bits(>val[0], 1 << 1, 0);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+   pasid_set_bits(>val[2], 1 << 0, 1);
+}
+
+/*
+ * Setup the P(Present) field (Bit 0) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_present(struct pasid_entry *pe)
+{
+   pasid_set_bits(>val[0], 1 << 0, 1);
+}
+
+/*
+ * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+{
+   pasid_set_bits(>val[1], 1 << 23, value);
+}
+
+static void
+pasid_based_pasid_cache_invalidation(struct intel_iommu *iommu,
+int did, int pasid)
+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+   desc.qw1 = 0;
+   desc.qw2 = 0;
+   desc.qw3 = 0;
+
+   qi_submit_sync(, iommu);
+}
+
+static void
+pasid_based_iotlb_cache_invalidation(struct intel_iommu *iommu,
+u16 did, u32 pasid)
+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
+   QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
+   desc.qw1 = 0;
+   desc.qw2 = 0;
+   desc.qw3 = 0;
+
+   qi_submit_sync(, iommu);
+}
+
+static void
+pasid_based_dev_iotlb_cache_invalidation(struct intel_iommu *iommu,
+struct device *dev, int pasid)
+{
+   struct device_domain_info *info;
+   u16 sid, qdep, pfsid;
+
+   info = dev->archdata.iommu;
+   if (!info || !info->ats_enabled)
+   return;
+
+   sid = info->bus << 8 | info->devfn;
+   qdep = info->ats_qdep;
+   pfsid = info->pfsid;
+
+   qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+}
+
+static void tear_down_one_pasid_entry(struct intel_iommu *iommu,
+ struct device *dev, u16 did,
+ int pasid)
+{
+