This patch removes any implicit flushing that occurs in the implementation of map and unmap operations and, instead, adds explicit flushing at the end of the loops in the iommu_map/unmap() wrapper functions.
Because VT-d currently performs two different types of flush dependent upon whether a PTE is being modified versus merely added (i.e. replacing a non- present PTE) a 'iommu_flush_type' enumeration is defined by this patch and the iommu_ops map method is modified to pass back the type of flush necessary for the PTE that has been populated. When a higher order mapping operation is done, the wrapper code performs the 'highest' level of flush required by the individual iommu_ops method calls, where a 'modified PTE' flush is deemed to be higher than a 'added PTE' flush. The ARM SMMU implementation currently performs no implicit flushing and therefore the modified map method always passes back a flush type of 'none'. NOTE: The per-cpu 'iommu_dont_flush_iotlb' is respected by the iommu_map() wrapper function and therefore this now applies to all IOMMU implementations rather than just VT-d. Use of the flag has been added to arch_iommu_hwdom_init() so that flushing is now fully elided for the hardware domain's initial table population. Signed-off-by: Paul Durrant <paul.durr...@citrix.com> --- Cc: Stefano Stabellini <sstabell...@kernel.org> Cc: Julien Grall <julien.gr...@arm.com> Cc: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com> Cc: Brian Woods <brian.wo...@amd.com> Cc: Jan Beulich <jbeul...@suse.com> Cc: Kevin Tian <kevin.t...@intel.com> Cc: Andrew Cooper <andrew.coop...@citrix.com> Cc: Wei Liu <wei.l...@citrix.com> Cc: "Roger Pau Monné" <roger....@citrix.com> This code has only been compile tested for ARM. --- xen/drivers/passthrough/amd/iommu_map.c | 70 ++++++++++++++++----------- xen/drivers/passthrough/arm/smmu.c | 13 +++-- xen/drivers/passthrough/iommu.c | 31 ++++++++++-- xen/drivers/passthrough/vtd/iommu.c | 26 +++++----- xen/drivers/passthrough/x86/iommu.c | 15 ++++-- xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 8 +-- xen/include/xen/iommu.h | 13 ++++- 7 files changed, 121 insertions(+), 55 deletions(-) diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c index c05b042821..6afc412d3f 100644 --- a/xen/drivers/passthrough/amd/iommu_map.c +++ b/xen/drivers/passthrough/amd/iommu_map.c @@ -45,13 +45,14 @@ static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long dfn) unmap_domain_page(table); } -static bool set_iommu_pde_present(uint32_t *pde, unsigned long next_mfn, - unsigned int next_level, - bool iw, bool ir) +static enum iommu_flush_type set_iommu_pde_present( + uint32_t *pde, unsigned long next_mfn, unsigned int next_level, bool iw, + bool ir) { uint64_t maddr_next; uint32_t addr_lo, addr_hi, entry; - bool need_flush = false, old_present; + bool old_present; + enum iommu_flush_type flush_type = IOMMU_FLUSH_none; maddr_next = __pfn_to_paddr(next_mfn); @@ -84,7 +85,7 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned long next_mfn, if ( maddr_old != maddr_next || iw != old_w || ir != old_r || old_level != next_level ) - need_flush = true; + flush_type = IOMMU_FLUSH_modified; } addr_lo = maddr_next & DMA_32BIT_MASK; @@ -121,24 +122,24 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned long next_mfn, IOMMU_PDE_PRESENT_SHIFT, &entry); pde[0] = entry; - return need_flush; + return flush_type; } -static bool set_iommu_pte_present(unsigned long pt_mfn, unsigned long dfn, - unsigned long next_mfn, int pde_level, - bool iw, bool ir) +static enum iommu_flush_type set_iommu_pte_present( + unsigned long pt_mfn, unsigned long dfn, unsigned long next_mfn, + int pde_level, bool iw, bool ir) { uint64_t *table; uint32_t *pde; - bool need_flush; + enum iommu_flush_type flush_type; table = map_domain_page(_mfn(pt_mfn)); pde = (uint32_t *)(table + pfn_to_pde_idx(dfn, pde_level)); - need_flush = set_iommu_pde_present(pde, next_mfn, 0, iw, ir); + flush_type = set_iommu_pde_present(pde, next_mfn, 0, iw, ir); unmap_domain_page(table); - return need_flush; + return flush_type; } void amd_iommu_set_root_page_table(uint32_t *dte, uint64_t root_ptr, @@ -522,13 +523,15 @@ static int update_paging_mode(struct domain *d, unsigned long dfn) } int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn, - unsigned int flags) + unsigned int flags, + enum iommu_flush_type *flush_type) { - bool need_flush; struct domain_iommu *hd = dom_iommu(d); int rc; unsigned long pt_mfn[7]; + *flush_type = IOMMU_FLUSH_none; + if ( iommu_use_hap_pt(d) ) return 0; @@ -570,12 +573,9 @@ int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn, } /* Install 4k mapping */ - need_flush = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn), 1, - !!(flags & IOMMUF_writable), - !!(flags & IOMMUF_readable)); - - if ( need_flush ) - amd_iommu_flush_pages(d, dfn_x(dfn), 0); + *flush_type = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn), + 1, !!(flags & IOMMUF_writable), + !!(flags & IOMMUF_readable)); spin_unlock(&hd->arch.mapping_lock); return 0; @@ -629,8 +629,6 @@ int amd_iommu_unmap_page(struct domain *d, dfn_t dfn) clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn)); spin_unlock(&hd->arch.mapping_lock); - - amd_iommu_flush_pages(d, dfn_x(dfn), 0); return 0; } @@ -646,8 +644,14 @@ static unsigned long flush_count(dfn_t dfn, unsigned int page_count, } int amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn, - unsigned int page_count) + unsigned int page_count, + enum iommu_flush_type flush_type) { + if ( flush_type == IOMMU_FLUSH_none) + return 0; + + ASSERT(flush_type == IOMMU_FLUSH_modified); + /* Match VT-d semantics */ if ( !page_count || dfn_eq(dfn, INVALID_DFN) || dfn_lt(dfn_add(dfn, page_count), dfn) /* overflow */ ) @@ -687,10 +691,11 @@ int amd_iommu_reserve_domain_unity_map(struct domain *domain, paddr_t phys_addr, unsigned long size, int iw, int ir) { + enum iommu_flush_type flush_type = IOMMU_FLUSH_none; unsigned long npages, i; unsigned long gfn; unsigned int flags = !!ir; - int rt = 0; + int ignored, rt = 0; if ( iw ) flags |= IOMMUF_writable; @@ -700,12 +705,23 @@ int amd_iommu_reserve_domain_unity_map(struct domain *domain, for ( i = 0; i < npages; i++ ) { unsigned long frame = gfn + i; + enum iommu_flush_type this_flush_type; - rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags); + rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags, + &this_flush_type); if ( rt != 0 ) - return rt; + break; + + flush_type = MAX(flush_type, this_flush_type); } - return 0; + + /* + * The underlying implementation is void so the return value is + * meaningless and can hence be ignored. + */ + ignored = amd_iommu_flush_iotlb_pages(domain, _dfn(gfn), npages, + flush_type); + return rt; } /* Share p2m table with iommu. */ diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c index 9612c0fddc..25b95d0197 100644 --- a/xen/drivers/passthrough/arm/smmu.c +++ b/xen/drivers/passthrough/arm/smmu.c @@ -2534,8 +2534,9 @@ static int __must_check arm_smmu_iotlb_flush_all(struct domain *d) return 0; } -static int __must_check arm_smmu_iotlb_flush(struct domain *d, dfn_t dfn, - unsigned int page_count) +static int __must_check arm_smmu_iotlb_flush( + struct domain *d, dfn_t dfn, unsigned int page_count, + enum iommu_flush_type flush_type) { /* ARM SMMU v1 doesn't have flush by VMA and VMID */ return arm_smmu_iotlb_flush_all(d); @@ -2731,8 +2732,9 @@ static void arm_smmu_iommu_domain_teardown(struct domain *d) xfree(xen_domain); } -static int __must_check arm_smmu_map_page(struct domain *d, dfn_t dfn, - mfn_t mfn, unsigned int flags) +static int __must_check arm_smmu_map_page( + struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags, + enum iommu_flush_type *flush_type) { p2m_type_t t; @@ -2753,6 +2755,9 @@ static int __must_check arm_smmu_map_page(struct domain *d, dfn_t dfn, t = (flags & IOMMUF_writable) ? p2m_iommu_map_rw : p2m_iommu_map_ro; + /* Flushing is always done explicitly by th P2M code */ + *flush_type = IOMMU_FLUSH_none; + /* * The function guest_physmap_add_entry replaces the current mapping * if there is already one... diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index ac62d7f52a..bfd9874034 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -219,6 +219,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d) unsigned long mfn = mfn_x(page_to_mfn(page)); unsigned long dfn = mfn_to_gmfn(d, mfn); unsigned int mapping = IOMMUF_readable; + enum iommu_flush_type flush_type; int ret; if ( ((page->u.inuse.type_info & PGT_count_mask) == 0) || @@ -227,7 +228,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d) mapping |= IOMMUF_writable; ret = hd->platform_ops->map_page(d, _dfn(dfn), _mfn(mfn), - mapping); + mapping, &flush_type); if ( !rc ) rc = ret; @@ -235,6 +236,9 @@ void __hwdom_init iommu_hwdom_init(struct domain *d) process_pending_softirqs(); } + if ( !rc && hd->platform_ops->iotlb_flush_all ) + rc = hd->platform_ops->iotlb_flush_all(d); + if ( rc ) printk(XENLOG_WARNING "d%d: IOMMU mapping failed: %d\n", d->domain_id, rc); @@ -308,6 +312,7 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int page_order, unsigned int flags) { const struct domain_iommu *hd = dom_iommu(d); + enum iommu_flush_type flush_type = IOMMU_FLUSH_none; unsigned long i; int rc = 0; @@ -319,11 +324,18 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn, for ( i = 0; i < (1ul << page_order); i++ ) { + enum iommu_flush_type this_flush_type; + int ignore; + rc = hd->platform_ops->map_page(d, dfn_add(dfn, i), - mfn_add(mfn, i), flags); + mfn_add(mfn, i), flags, + &this_flush_type); if ( likely(!rc) ) + { + flush_type = MAX(flush_type, this_flush_type); continue; + } if ( !d->is_shutting_down && printk_ratelimit() ) printk(XENLOG_ERR @@ -336,12 +348,19 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn, if ( hd->platform_ops->unmap_page(d, dfn_add(dfn, i)) ) continue; + /* Something went wrong so attempt a full flush */ + ignore = hd->platform_ops->iotlb_flush_all(d); + if ( !is_hardware_domain(d) ) domain_crash(d); break; } + if ( hd->platform_ops->iotlb_flush && !this_cpu(iommu_dont_flush_iotlb) ) + rc = hd->platform_ops->iotlb_flush(d, dfn, (1ul << page_order), + flush_type); + return rc; } @@ -378,6 +397,10 @@ int iommu_unmap(struct domain *d, dfn_t dfn, unsigned int page_order) } } + if ( hd->platform_ops->iotlb_flush ) + rc = hd->platform_ops->iotlb_flush(d, dfn, (1ul << page_order), + IOMMU_FLUSH_modified); + return rc; } @@ -417,7 +440,9 @@ int iommu_iotlb_flush(struct domain *d, dfn_t dfn, unsigned int page_count) if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush ) return 0; - rc = hd->platform_ops->iotlb_flush(d, dfn, page_count); + /* Assume a 'modified' flush is required */ + rc = hd->platform_ops->iotlb_flush(d, dfn, page_count, + IOMMU_FLUSH_modified); if ( unlikely(rc) ) { if ( !d->is_shutting_down && printk_ratelimit() ) diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index 1601278b07..d547ba3d23 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -631,11 +631,14 @@ static int __must_check iommu_flush_iotlb(struct domain *d, dfn_t dfn, return rc; } -static int __must_check iommu_flush_iotlb_pages(struct domain *d, - dfn_t dfn, - unsigned int page_count) +static int __must_check iommu_flush_iotlb_pages( + struct domain *d, dfn_t dfn, unsigned int page_count, + enum iommu_flush_type flush_type) { - return iommu_flush_iotlb(d, dfn, 1, page_count); + return (flush_type == IOMMU_FLUSH_none) ? + 0 : + iommu_flush_iotlb(d, dfn, (flush_type == IOMMU_FLUSH_modified), + page_count); } static int __must_check iommu_flush_iotlb_all(struct domain *d) @@ -674,9 +677,6 @@ static int __must_check dma_pte_clear_one(struct domain *domain, u64 addr) spin_unlock(&hd->arch.mapping_lock); iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); - if ( !this_cpu(iommu_dont_flush_iotlb) ) - rc = iommu_flush_iotlb_pages(domain, daddr_to_dfn(addr), 1); - unmap_vtd_domain_page(page); return rc; @@ -1771,15 +1771,17 @@ static void iommu_domain_teardown(struct domain *d) spin_unlock(&hd->arch.mapping_lock); } -static int __must_check intel_iommu_map_page(struct domain *d, - dfn_t dfn, mfn_t mfn, - unsigned int flags) +static int __must_check intel_iommu_map_page( + struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags, + enum iommu_flush_type *flush_type) { struct domain_iommu *hd = dom_iommu(d); struct dma_pte *page, *pte, old, new = {}; u64 pg_maddr; int rc = 0; + *flush_type = IOMMU_FLUSH_none; + /* Do nothing if VT-d shares EPT page table */ if ( iommu_use_hap_pt(d) ) return 0; @@ -1823,8 +1825,8 @@ static int __must_check intel_iommu_map_page(struct domain *d, spin_unlock(&hd->arch.mapping_lock); unmap_vtd_domain_page(page); - if ( !this_cpu(iommu_dont_flush_iotlb) ) - rc = iommu_flush_iotlb(d, dfn, dma_pte_present(old), 1); + *flush_type = dma_pte_present(old) ? + IOMMU_FLUSH_modified : IOMMU_FLUSH_added; return rc; } diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c index c68a72279d..04b6330336 100644 --- a/xen/drivers/passthrough/x86/iommu.c +++ b/xen/drivers/passthrough/x86/iommu.c @@ -50,7 +50,6 @@ int arch_iommu_populate_page_table(struct domain *d) struct page_info *page; int rc = 0, n = 0; - this_cpu(iommu_dont_flush_iotlb) = 1; spin_lock(&d->page_alloc_lock); if ( unlikely(d->is_dying) ) @@ -63,6 +62,7 @@ int arch_iommu_populate_page_table(struct domain *d) { unsigned long mfn = mfn_x(page_to_mfn(page)); unsigned long gfn = mfn_to_gmfn(d, mfn); + enum iommu_flush_type flush_type; if ( gfn != gfn_x(INVALID_GFN) ) { @@ -70,7 +70,8 @@ int arch_iommu_populate_page_table(struct domain *d) BUG_ON(SHARED_M2P(gfn)); rc = hd->platform_ops->map_page(d, _dfn(gfn), _mfn(mfn), IOMMUF_readable | - IOMMUF_writable); + IOMMUF_writable, + &flush_type); } if ( rc ) { @@ -104,7 +105,6 @@ int arch_iommu_populate_page_table(struct domain *d) } spin_unlock(&d->page_alloc_lock); - this_cpu(iommu_dont_flush_iotlb) = 0; if ( !rc ) rc = iommu_iotlb_flush_all(d); @@ -207,6 +207,7 @@ static bool __hwdom_init hwdom_iommu_map(const struct domain *d, void __hwdom_init arch_iommu_hwdom_init(struct domain *d) { unsigned long i, top, max_pfn; + int rc; BUG_ON(!is_hardware_domain(d)); @@ -230,10 +231,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d) max_pfn = (GB(4) >> PAGE_SHIFT) - 1; top = max(max_pdx, pfn_to_pdx(max_pfn) + 1); + this_cpu(iommu_dont_flush_iotlb) = 1; for ( i = 0; i < top; i++ ) { unsigned long pfn = pdx_to_pfn(i); - int rc; if ( !hwdom_iommu_map(d, pfn, max_pfn) ) continue; @@ -250,6 +251,12 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d) if (!(i & 0xfffff)) process_pending_softirqs(); } + this_cpu(iommu_dont_flush_iotlb) = 0; + + rc = iommu_iotlb_flush_all(d); + if ( rc ) + printk(XENLOG_WARNING " d%d: IOMMU flush failed: %d\n", + d->domain_id, rc); } /* diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h index 88715329ca..5d86cf9850 100644 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -53,15 +53,17 @@ int amd_iommu_update_ivrs_mapping_acpi(void); /* mapping functions */ int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn, - mfn_t mfn, unsigned int flags); + mfn_t mfn, unsigned int flags, + enum iommu_flush_type *flush_type); int __must_check amd_iommu_unmap_page(struct domain *d, dfn_t dfn); uint64_t amd_iommu_get_address_from_pte(void *entry); int __must_check amd_iommu_alloc_root(struct domain_iommu *hd); int amd_iommu_reserve_domain_unity_map(struct domain *domain, paddr_t phys_addr, unsigned long size, int iw, int ir); -int __must_check amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn, - unsigned int page_count); +int __must_check amd_iommu_flush_iotlb_pages( + struct domain *d, dfn_t dfn, unsigned int page_count, + enum iommu_flush_type flush_type); int __must_check amd_iommu_flush_iotlb_all(struct domain *d); /* Share p2m table with iommu */ diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index da8294bac8..289e0e2772 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -155,6 +155,13 @@ struct page_info; */ typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt); +enum iommu_flush_type +{ + IOMMU_FLUSH_none, /* no flush required */ + IOMMU_FLUSH_added, /* no modified entries, just additional entries */ + IOMMU_FLUSH_modified, /* modified entries */ +}; + struct iommu_ops { int (*init)(struct domain *d); void (*hwdom_init)(struct domain *d); @@ -177,7 +184,8 @@ struct iommu_ops { * other by the caller in order to have meaningful results. */ int __must_check (*map_page)(struct domain *d, dfn_t dfn, mfn_t mfn, - unsigned int flags); + unsigned int flags, + enum iommu_flush_type *flush_type); int __must_check (*unmap_page)(struct domain *d, dfn_t dfn); int __must_check (*lookup_page)(struct domain *d, dfn_t dfn, mfn_t *mfn, unsigned int *flags); @@ -193,7 +201,8 @@ struct iommu_ops { void (*share_p2m)(struct domain *d); void (*crash_shutdown)(void); int __must_check (*iotlb_flush)(struct domain *d, dfn_t dfn, - unsigned int page_count); + unsigned int page_count, + enum iommu_flush_type flush_type); int __must_check (*iotlb_flush_all)(struct domain *d); int (*get_reserved_device_memory)(iommu_grdm_t *, void *); void (*dump_p2m_table)(struct domain *d); -- 2.11.0 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel