This patch removes any implicit flushing that occurs in the implementation
of map and unmap operations and, instead, adds explicit flushing at the
end of the loops in the iommu_map/unmap() wrapper functions.

Because VT-d currently performs two different types of flush dependent upon
whether a PTE is being modified versus merely added (i.e. replacing a non-
present PTE) a 'iommu_flush_type' enumeration is defined by this patch and
the iommu_ops map method is modified to pass back the type of flush
necessary for the PTE that has been populated. When a higher order mapping
operation is done, the wrapper code performs the 'highest' level of flush
required by the individual iommu_ops method calls, where a 'modified PTE'
flush is deemed to be higher than a 'added PTE' flush. The ARM SMMU
implementation currently performs no implicit flushing and therefore
the modified map method always passes back a flush type of 'none'.

NOTE: The per-cpu 'iommu_dont_flush_iotlb' is respected by the iommu_map()
      wrapper function and therefore this now applies to all IOMMU
      implementations rather than just VT-d. Use of the flag has been added
      to arch_iommu_hwdom_init() so that flushing is now fully elided for
      the hardware domain's initial table population.

Signed-off-by: Paul Durrant <paul.durr...@citrix.com>
---
Cc: Stefano Stabellini <sstabell...@kernel.org>
Cc: Julien Grall <julien.gr...@arm.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com>
Cc: Brian Woods <brian.wo...@amd.com>
Cc: Jan Beulich <jbeul...@suse.com>
Cc: Kevin Tian <kevin.t...@intel.com>
Cc: Andrew Cooper <andrew.coop...@citrix.com>
Cc: Wei Liu <wei.l...@citrix.com>
Cc: "Roger Pau Monné" <roger....@citrix.com>

This code has only been compile tested for ARM.
---
 xen/drivers/passthrough/amd/iommu_map.c       | 70 ++++++++++++++++-----------
 xen/drivers/passthrough/arm/smmu.c            | 13 +++--
 xen/drivers/passthrough/iommu.c               | 31 ++++++++++--
 xen/drivers/passthrough/vtd/iommu.c           | 26 +++++-----
 xen/drivers/passthrough/x86/iommu.c           | 15 ++++--
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  8 +--
 xen/include/xen/iommu.h                       | 13 ++++-
 7 files changed, 121 insertions(+), 55 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_map.c 
b/xen/drivers/passthrough/amd/iommu_map.c
index c05b042821..6afc412d3f 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -45,13 +45,14 @@ static void clear_iommu_pte_present(unsigned long l1_mfn, 
unsigned long dfn)
     unmap_domain_page(table);
 }
 
-static bool set_iommu_pde_present(uint32_t *pde, unsigned long next_mfn,
-                                  unsigned int next_level,
-                                  bool iw, bool ir)
+static enum iommu_flush_type set_iommu_pde_present(
+    uint32_t *pde, unsigned long next_mfn, unsigned int next_level, bool iw,
+    bool ir)
 {
     uint64_t maddr_next;
     uint32_t addr_lo, addr_hi, entry;
-    bool need_flush = false, old_present;
+    bool old_present;
+    enum iommu_flush_type flush_type = IOMMU_FLUSH_none;
 
     maddr_next = __pfn_to_paddr(next_mfn);
 
@@ -84,7 +85,7 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned 
long next_mfn,
 
         if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
              old_level != next_level )
-            need_flush = true;
+            flush_type = IOMMU_FLUSH_modified;
     }
 
     addr_lo = maddr_next & DMA_32BIT_MASK;
@@ -121,24 +122,24 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned 
long next_mfn,
                          IOMMU_PDE_PRESENT_SHIFT, &entry);
     pde[0] = entry;
 
-    return need_flush;
+    return flush_type;
 }
 
-static bool set_iommu_pte_present(unsigned long pt_mfn, unsigned long dfn,
-                                  unsigned long next_mfn, int pde_level,
-                                  bool iw, bool ir)
+static enum iommu_flush_type set_iommu_pte_present(
+    unsigned long pt_mfn, unsigned long dfn, unsigned long next_mfn,
+    int pde_level, bool iw, bool ir)
 {
     uint64_t *table;
     uint32_t *pde;
-    bool need_flush;
+    enum iommu_flush_type flush_type;
 
     table = map_domain_page(_mfn(pt_mfn));
 
     pde = (uint32_t *)(table + pfn_to_pde_idx(dfn, pde_level));
 
-    need_flush = set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
+    flush_type = set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
     unmap_domain_page(table);
-    return need_flush;
+    return flush_type;
 }
 
 void amd_iommu_set_root_page_table(uint32_t *dte, uint64_t root_ptr,
@@ -522,13 +523,15 @@ static int update_paging_mode(struct domain *d, unsigned 
long dfn)
 }
 
 int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn,
-                       unsigned int flags)
+                       unsigned int flags,
+                       enum iommu_flush_type *flush_type)
 {
-    bool need_flush;
     struct domain_iommu *hd = dom_iommu(d);
     int rc;
     unsigned long pt_mfn[7];
 
+    *flush_type = IOMMU_FLUSH_none;
+
     if ( iommu_use_hap_pt(d) )
         return 0;
 
@@ -570,12 +573,9 @@ int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t 
mfn,
     }
 
     /* Install 4k mapping */
-    need_flush = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn), 1,
-                                       !!(flags & IOMMUF_writable),
-                                       !!(flags & IOMMUF_readable));
-
-    if ( need_flush )
-        amd_iommu_flush_pages(d, dfn_x(dfn), 0);
+    *flush_type = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn),
+                                        1, !!(flags & IOMMUF_writable),
+                                        !!(flags & IOMMUF_readable));
 
     spin_unlock(&hd->arch.mapping_lock);
     return 0;
@@ -629,8 +629,6 @@ int amd_iommu_unmap_page(struct domain *d, dfn_t dfn)
     clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
 
     spin_unlock(&hd->arch.mapping_lock);
-
-    amd_iommu_flush_pages(d, dfn_x(dfn), 0);
     return 0;
 }
 
@@ -646,8 +644,14 @@ static unsigned long flush_count(dfn_t dfn, unsigned int 
page_count,
 }
 
 int amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
-                                unsigned int page_count)
+                                unsigned int page_count,
+                                enum iommu_flush_type flush_type)
 {
+    if ( flush_type == IOMMU_FLUSH_none)
+        return 0;
+
+    ASSERT(flush_type == IOMMU_FLUSH_modified);
+
     /* Match VT-d semantics */
     if ( !page_count || dfn_eq(dfn, INVALID_DFN) ||
          dfn_lt(dfn_add(dfn, page_count), dfn) /* overflow */ )
@@ -687,10 +691,11 @@ int amd_iommu_reserve_domain_unity_map(struct domain 
*domain,
                                        paddr_t phys_addr,
                                        unsigned long size, int iw, int ir)
 {
+    enum iommu_flush_type flush_type = IOMMU_FLUSH_none;
     unsigned long npages, i;
     unsigned long gfn;
     unsigned int flags = !!ir;
-    int rt = 0;
+    int ignored, rt = 0;
 
     if ( iw )
         flags |= IOMMUF_writable;
@@ -700,12 +705,23 @@ int amd_iommu_reserve_domain_unity_map(struct domain 
*domain,
     for ( i = 0; i < npages; i++ )
     {
         unsigned long frame = gfn + i;
+        enum iommu_flush_type this_flush_type;
 
-        rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags);
+        rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags,
+                                &this_flush_type);
         if ( rt != 0 )
-            return rt;
+            break;
+
+        flush_type = MAX(flush_type, this_flush_type);
     }
-    return 0;
+
+    /*
+     * The underlying implementation is void so the return value is
+     * meaningless and can hence be ignored.
+     */
+    ignored = amd_iommu_flush_iotlb_pages(domain, _dfn(gfn), npages,
+                                          flush_type);
+    return rt;
 }
 
 /* Share p2m table with iommu. */
diff --git a/xen/drivers/passthrough/arm/smmu.c 
b/xen/drivers/passthrough/arm/smmu.c
index 9612c0fddc..25b95d0197 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -2534,8 +2534,9 @@ static int __must_check arm_smmu_iotlb_flush_all(struct 
domain *d)
        return 0;
 }
 
-static int __must_check arm_smmu_iotlb_flush(struct domain *d, dfn_t dfn,
-                                             unsigned int page_count)
+static int __must_check arm_smmu_iotlb_flush(
+       struct domain *d, dfn_t dfn, unsigned int page_count,
+       enum iommu_flush_type flush_type)
 {
        /* ARM SMMU v1 doesn't have flush by VMA and VMID */
        return arm_smmu_iotlb_flush_all(d);
@@ -2731,8 +2732,9 @@ static void arm_smmu_iommu_domain_teardown(struct domain 
*d)
        xfree(xen_domain);
 }
 
-static int __must_check arm_smmu_map_page(struct domain *d, dfn_t dfn,
-                                         mfn_t mfn, unsigned int flags)
+static int __must_check arm_smmu_map_page(
+       struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
+       enum iommu_flush_type *flush_type)
 {
        p2m_type_t t;
 
@@ -2753,6 +2755,9 @@ static int __must_check arm_smmu_map_page(struct domain 
*d, dfn_t dfn,
 
        t = (flags & IOMMUF_writable) ? p2m_iommu_map_rw : p2m_iommu_map_ro;
 
+       /* Flushing is always done explicitly by th P2M code */
+       *flush_type = IOMMU_FLUSH_none;
+
        /*
         * The function guest_physmap_add_entry replaces the current mapping
         * if there is already one...
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index ac62d7f52a..bfd9874034 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -219,6 +219,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
             unsigned long mfn = mfn_x(page_to_mfn(page));
             unsigned long dfn = mfn_to_gmfn(d, mfn);
             unsigned int mapping = IOMMUF_readable;
+            enum iommu_flush_type flush_type;
             int ret;
 
             if ( ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
@@ -227,7 +228,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
                 mapping |= IOMMUF_writable;
 
             ret = hd->platform_ops->map_page(d, _dfn(dfn), _mfn(mfn),
-                                             mapping);
+                                             mapping, &flush_type);
             if ( !rc )
                 rc = ret;
 
@@ -235,6 +236,9 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
                 process_pending_softirqs();
         }
 
+        if ( !rc && hd->platform_ops->iotlb_flush_all )
+            rc = hd->platform_ops->iotlb_flush_all(d);
+
         if ( rc )
             printk(XENLOG_WARNING "d%d: IOMMU mapping failed: %d\n",
                    d->domain_id, rc);
@@ -308,6 +312,7 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn,
               unsigned int page_order, unsigned int flags)
 {
     const struct domain_iommu *hd = dom_iommu(d);
+    enum iommu_flush_type flush_type = IOMMU_FLUSH_none;
     unsigned long i;
     int rc = 0;
 
@@ -319,11 +324,18 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn,
 
     for ( i = 0; i < (1ul << page_order); i++ )
     {
+        enum iommu_flush_type this_flush_type;
+        int ignore;
+
         rc = hd->platform_ops->map_page(d, dfn_add(dfn, i),
-                                        mfn_add(mfn, i), flags);
+                                        mfn_add(mfn, i), flags,
+                                        &this_flush_type);
 
         if ( likely(!rc) )
+        {
+            flush_type = MAX(flush_type, this_flush_type);
             continue;
+        }
 
         if ( !d->is_shutting_down && printk_ratelimit() )
             printk(XENLOG_ERR
@@ -336,12 +348,19 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn,
             if ( hd->platform_ops->unmap_page(d, dfn_add(dfn, i)) )
                 continue;
 
+        /* Something went wrong so attempt a full flush */
+        ignore = hd->platform_ops->iotlb_flush_all(d);
+
         if ( !is_hardware_domain(d) )
             domain_crash(d);
 
         break;
     }
 
+    if ( hd->platform_ops->iotlb_flush && !this_cpu(iommu_dont_flush_iotlb) )
+        rc = hd->platform_ops->iotlb_flush(d, dfn, (1ul << page_order),
+                                           flush_type);
+
     return rc;
 }
 
@@ -378,6 +397,10 @@ int iommu_unmap(struct domain *d, dfn_t dfn, unsigned int 
page_order)
         }
     }
 
+    if ( hd->platform_ops->iotlb_flush )
+        rc = hd->platform_ops->iotlb_flush(d, dfn, (1ul << page_order),
+                                           IOMMU_FLUSH_modified);
+
     return rc;
 }
 
@@ -417,7 +440,9 @@ int iommu_iotlb_flush(struct domain *d, dfn_t dfn, unsigned 
int page_count)
     if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush 
)
         return 0;
 
-    rc = hd->platform_ops->iotlb_flush(d, dfn, page_count);
+    /* Assume a 'modified' flush is required */
+    rc = hd->platform_ops->iotlb_flush(d, dfn, page_count,
+                                       IOMMU_FLUSH_modified);
     if ( unlikely(rc) )
     {
         if ( !d->is_shutting_down && printk_ratelimit() )
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 1601278b07..d547ba3d23 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -631,11 +631,14 @@ static int __must_check iommu_flush_iotlb(struct domain 
*d, dfn_t dfn,
     return rc;
 }
 
-static int __must_check iommu_flush_iotlb_pages(struct domain *d,
-                                                dfn_t dfn,
-                                                unsigned int page_count)
+static int __must_check iommu_flush_iotlb_pages(
+    struct domain *d, dfn_t dfn, unsigned int page_count,
+    enum iommu_flush_type flush_type)
 {
-    return iommu_flush_iotlb(d, dfn, 1, page_count);
+    return (flush_type == IOMMU_FLUSH_none) ?
+           0 :
+           iommu_flush_iotlb(d, dfn, (flush_type == IOMMU_FLUSH_modified),
+                             page_count);
 }
 
 static int __must_check iommu_flush_iotlb_all(struct domain *d)
@@ -674,9 +677,6 @@ static int __must_check dma_pte_clear_one(struct domain 
*domain, u64 addr)
     spin_unlock(&hd->arch.mapping_lock);
     iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
 
-    if ( !this_cpu(iommu_dont_flush_iotlb) )
-        rc = iommu_flush_iotlb_pages(domain, daddr_to_dfn(addr), 1);
-
     unmap_vtd_domain_page(page);
 
     return rc;
@@ -1771,15 +1771,17 @@ static void iommu_domain_teardown(struct domain *d)
     spin_unlock(&hd->arch.mapping_lock);
 }
 
-static int __must_check intel_iommu_map_page(struct domain *d,
-                                             dfn_t dfn, mfn_t mfn,
-                                             unsigned int flags)
+static int __must_check intel_iommu_map_page(
+    struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
+    enum iommu_flush_type *flush_type)
 {
     struct domain_iommu *hd = dom_iommu(d);
     struct dma_pte *page, *pte, old, new = {};
     u64 pg_maddr;
     int rc = 0;
 
+    *flush_type = IOMMU_FLUSH_none;
+
     /* Do nothing if VT-d shares EPT page table */
     if ( iommu_use_hap_pt(d) )
         return 0;
@@ -1823,8 +1825,8 @@ static int __must_check intel_iommu_map_page(struct 
domain *d,
     spin_unlock(&hd->arch.mapping_lock);
     unmap_vtd_domain_page(page);
 
-    if ( !this_cpu(iommu_dont_flush_iotlb) )
-        rc = iommu_flush_iotlb(d, dfn, dma_pte_present(old), 1);
+    *flush_type = dma_pte_present(old) ?
+        IOMMU_FLUSH_modified : IOMMU_FLUSH_added;
 
     return rc;
 }
diff --git a/xen/drivers/passthrough/x86/iommu.c 
b/xen/drivers/passthrough/x86/iommu.c
index c68a72279d..04b6330336 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -50,7 +50,6 @@ int arch_iommu_populate_page_table(struct domain *d)
     struct page_info *page;
     int rc = 0, n = 0;
 
-    this_cpu(iommu_dont_flush_iotlb) = 1;
     spin_lock(&d->page_alloc_lock);
 
     if ( unlikely(d->is_dying) )
@@ -63,6 +62,7 @@ int arch_iommu_populate_page_table(struct domain *d)
         {
             unsigned long mfn = mfn_x(page_to_mfn(page));
             unsigned long gfn = mfn_to_gmfn(d, mfn);
+            enum iommu_flush_type flush_type;
 
             if ( gfn != gfn_x(INVALID_GFN) )
             {
@@ -70,7 +70,8 @@ int arch_iommu_populate_page_table(struct domain *d)
                 BUG_ON(SHARED_M2P(gfn));
                 rc = hd->platform_ops->map_page(d, _dfn(gfn), _mfn(mfn),
                                                 IOMMUF_readable |
-                                                IOMMUF_writable);
+                                                IOMMUF_writable,
+                                                &flush_type);
             }
             if ( rc )
             {
@@ -104,7 +105,6 @@ int arch_iommu_populate_page_table(struct domain *d)
     }
 
     spin_unlock(&d->page_alloc_lock);
-    this_cpu(iommu_dont_flush_iotlb) = 0;
 
     if ( !rc )
         rc = iommu_iotlb_flush_all(d);
@@ -207,6 +207,7 @@ static bool __hwdom_init hwdom_iommu_map(const struct 
domain *d,
 void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
 {
     unsigned long i, top, max_pfn;
+    int rc;
 
     BUG_ON(!is_hardware_domain(d));
 
@@ -230,10 +231,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
     max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
     top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
 
+    this_cpu(iommu_dont_flush_iotlb) = 1;
     for ( i = 0; i < top; i++ )
     {
         unsigned long pfn = pdx_to_pfn(i);
-        int rc;
 
         if ( !hwdom_iommu_map(d, pfn, max_pfn) )
             continue;
@@ -250,6 +251,12 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
         if (!(i & 0xfffff))
             process_pending_softirqs();
     }
+    this_cpu(iommu_dont_flush_iotlb) = 0;
+
+    rc = iommu_iotlb_flush_all(d);
+    if ( rc )
+        printk(XENLOG_WARNING " d%d: IOMMU flush failed: %d\n",
+               d->domain_id, rc);
 }
 
 /*
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 
b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 88715329ca..5d86cf9850 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -53,15 +53,17 @@ int amd_iommu_update_ivrs_mapping_acpi(void);
 
 /* mapping functions */
 int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn,
-                                    mfn_t mfn, unsigned int flags);
+                                    mfn_t mfn, unsigned int flags,
+                                    enum iommu_flush_type *flush_type);
 int __must_check amd_iommu_unmap_page(struct domain *d, dfn_t dfn);
 uint64_t amd_iommu_get_address_from_pte(void *entry);
 int __must_check amd_iommu_alloc_root(struct domain_iommu *hd);
 int amd_iommu_reserve_domain_unity_map(struct domain *domain,
                                        paddr_t phys_addr, unsigned long size,
                                        int iw, int ir);
-int __must_check amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
-                                             unsigned int page_count);
+int __must_check amd_iommu_flush_iotlb_pages(
+    struct domain *d, dfn_t dfn, unsigned int page_count,
+    enum iommu_flush_type flush_type);
 int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
 
 /* Share p2m table with iommu */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index da8294bac8..289e0e2772 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -155,6 +155,13 @@ struct page_info;
  */
 typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
 
+enum iommu_flush_type
+{
+    IOMMU_FLUSH_none, /* no flush required */
+    IOMMU_FLUSH_added, /* no modified entries, just additional entries */
+    IOMMU_FLUSH_modified, /* modified entries */
+};
+
 struct iommu_ops {
     int (*init)(struct domain *d);
     void (*hwdom_init)(struct domain *d);
@@ -177,7 +184,8 @@ struct iommu_ops {
      * other by the caller in order to have meaningful results.
      */
     int __must_check (*map_page)(struct domain *d, dfn_t dfn, mfn_t mfn,
-                                 unsigned int flags);
+                                 unsigned int flags,
+                                 enum iommu_flush_type *flush_type);
     int __must_check (*unmap_page)(struct domain *d, dfn_t dfn);
     int __must_check (*lookup_page)(struct domain *d, dfn_t dfn, mfn_t *mfn,
                                     unsigned int *flags);
@@ -193,7 +201,8 @@ struct iommu_ops {
     void (*share_p2m)(struct domain *d);
     void (*crash_shutdown)(void);
     int __must_check (*iotlb_flush)(struct domain *d, dfn_t dfn,
-                                    unsigned int page_count);
+                                    unsigned int page_count,
+                                    enum iommu_flush_type flush_type);
     int __must_check (*iotlb_flush_all)(struct domain *d);
     int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
     void (*dump_p2m_table)(struct domain *d);
-- 
2.11.0


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to