On Sat, 16 May 2020 14:21:01 +0800 Lu Baolu <[email protected]> wrote:
> From: Tom Murphy <[email protected]> > > There's no need for the non-dma_ops path to keep track of IOVAs. The > whole point of the non-dma_ops path is that it allows the IOVAs to be > handled separately. The IOVA handling code removed in this patch is > pointless. > > Signed-off-by: Tom Murphy <[email protected]> > Signed-off-by: Lu Baolu <[email protected]> > --- > drivers/iommu/intel-iommu.c | 95 +++++++++++++------------------------ > 1 file changed, 32 insertions(+), 63 deletions(-) This commit results in a massive increase in memory use from the VT-d code. I have a 16GB system where I reserve 7168 2MB hugespages for VM usage (14GB), leaving the host with 2GB. I can no longer even boot the host in this configuration. Bisecting to this commit, I find that reverting this change shows the following memory usage difference immediately after boot (no hugepages, nosmp, single user, intel_iommu=on iommu=pt): @e70b081c6f37: total used free shared buff/cache available Mem: 16090860 2219372 13673044 1040 198444 13602664 Swap: 2097148 0 2097148 reverting e70b081c6f37: total used free shared buff/cache available Mem: 16090852 101648 15789156 1040 200048 15719572 Swap: 2097148 0 2097148 More than 2GB of additional memory used! There's also a notable stall during bootup for this allocation: [ 9.703360] DMAR: No ATSR found [ 9.709768] DMAR: dmar0: Using Queued invalidation [ 9.719370] DMAR: dmar1: Using Queued invalidation ### 4+ seconds! ### [ 14.076387] pci 0000:00:00.0: Adding to iommu group 0 [ 14.086515] pci 0000:00:01.0: Adding to iommu group 1 [ 14.096635] pci 0000:00:02.0: Adding to iommu group 2 Thanks, Alex > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 3c5cc3424e90..f75d7d9c231f 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -1892,11 +1892,6 @@ static int dmar_init_reserved_ranges(void) > return 0; > } > > -static void domain_reserve_special_ranges(struct dmar_domain *domain) > -{ > - copy_reserved_iova(&reserved_iova_list, &domain->iovad); > -} > - > static inline int guestwidth_to_adjustwidth(int gaw) > { > int agaw; > @@ -1918,7 +1913,8 @@ static void domain_exit(struct dmar_domain *domain) > domain_remove_dev_info(domain); > > /* destroy iovas */ > - put_iova_domain(&domain->iovad); > + if (domain->domain.type == IOMMU_DOMAIN_DMA) > + put_iova_domain(&domain->iovad); > > if (domain->pgd) { > struct page *freelist; > @@ -2627,19 +2623,9 @@ static struct dmar_domain > *dmar_insert_one_dev_info(struct intel_iommu *iommu, > } > > static int iommu_domain_identity_map(struct dmar_domain *domain, > - unsigned long long start, > - unsigned long long end) > + unsigned long first_vpfn, > + unsigned long last_vpfn) > { > - unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; > - unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; > - > - if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), > - dma_to_mm_pfn(last_vpfn))) { > - pr_err("Reserving iova failed\n"); > - return -ENOMEM; > - } > - > - pr_debug("Mapping reserved region %llx-%llx\n", start, end); > /* > * RMRR range might have overlap with physical memory range, > * clear it first > @@ -2677,7 +2663,8 @@ static int __init si_domain_init(int hw) > > for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { > ret = iommu_domain_identity_map(si_domain, > - PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); > + mm_to_dma_pfn(start_pfn), > + mm_to_dma_pfn(end_pfn)); > if (ret) > return ret; > } > @@ -4547,58 +4534,37 @@ static int intel_iommu_memory_notifier(struct > notifier_block *nb, > unsigned long val, void *v) > { > struct memory_notify *mhp = v; > - unsigned long long start, end; > - unsigned long start_vpfn, last_vpfn; > + unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); > + unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + > + mhp->nr_pages - 1); > > switch (val) { > case MEM_GOING_ONLINE: > - start = mhp->start_pfn << PAGE_SHIFT; > - end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; > - if (iommu_domain_identity_map(si_domain, start, end)) { > - pr_warn("Failed to build identity map for > [%llx-%llx]\n", > - start, end); > + if (iommu_domain_identity_map(si_domain, > + start_vpfn, last_vpfn)) { > + pr_warn("Failed to build identity map for [%lx-%lx]\n", > + start_vpfn, last_vpfn); > return NOTIFY_BAD; > } > break; > > case MEM_OFFLINE: > case MEM_CANCEL_ONLINE: > - start_vpfn = mm_to_dma_pfn(mhp->start_pfn); > - last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); > - while (start_vpfn <= last_vpfn) { > - struct iova *iova; > + { > struct dmar_drhd_unit *drhd; > struct intel_iommu *iommu; > struct page *freelist; > > - iova = find_iova(&si_domain->iovad, start_vpfn); > - if (iova == NULL) { > - pr_debug("Failed get IOVA for PFN %lx\n", > - start_vpfn); > - break; > - } > - > - iova = split_and_remove_iova(&si_domain->iovad, iova, > - start_vpfn, last_vpfn); > - if (iova == NULL) { > - pr_warn("Failed to split IOVA PFN [%lx-%lx]\n", > - start_vpfn, last_vpfn); > - return NOTIFY_BAD; > - } > - > - freelist = domain_unmap(si_domain, iova->pfn_lo, > - iova->pfn_hi); > + freelist = domain_unmap(si_domain, > + start_vpfn, last_vpfn); > > rcu_read_lock(); > for_each_active_iommu(iommu, drhd) > iommu_flush_iotlb_psi(iommu, si_domain, > - iova->pfn_lo, iova_size(iova), > + start_vpfn, mhp->nr_pages, > !freelist, 0); > rcu_read_unlock(); > dma_free_pagelist(freelist); > - > - start_vpfn = iova->pfn_hi + 1; > - free_iova_mem(iova); > } > break; > } > @@ -4626,8 +4592,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) > for (did = 0; did < cap_ndoms(iommu->cap); did++) { > domain = get_iommu_domain(iommu, (u16)did); > > - if (!domain) > + if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) > continue; > + > free_cpu_cached_iovas(cpu, &domain->iovad); > } > } > @@ -5037,9 +5004,6 @@ static int md_domain_init(struct dmar_domain *domain, > int guest_width) > { > int adjust_width; > > - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); > - domain_reserve_special_ranges(domain); > - > /* calculate AGAW */ > domain->gaw = guest_width; > adjust_width = guestwidth_to_adjustwidth(guest_width); > @@ -5058,11 +5022,21 @@ static int md_domain_init(struct dmar_domain *domain, > int guest_width) > return 0; > } > > +static void intel_init_iova_domain(struct dmar_domain *dmar_domain) > +{ > + init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); > + copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad); > + > + if (!intel_iommu_strict && > + init_iova_flush_queue(&dmar_domain->iovad, > + iommu_flush_iova, iova_entry_free)) > + pr_info("iova flush queue initialization failed\n"); > +} > + > static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) > { > struct dmar_domain *dmar_domain; > struct iommu_domain *domain; > - int ret; > > switch (type) { > case IOMMU_DOMAIN_DMA: > @@ -5079,13 +5053,8 @@ static struct iommu_domain > *intel_iommu_domain_alloc(unsigned type) > return NULL; > } > > - if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) { > - ret = init_iova_flush_queue(&dmar_domain->iovad, > - iommu_flush_iova, > - iova_entry_free); > - if (ret) > - pr_info("iova flush queue initialization > failed\n"); > - } > + if (type == IOMMU_DOMAIN_DMA) > + intel_init_iova_domain(dmar_domain); > > domain_update_iommu_cap(dmar_domain); > _______________________________________________ iommu mailing list [email protected] https://lists.linuxfoundation.org/mailman/listinfo/iommu
