Use iommu_iova_to_phys_length() to get PTE page size, allowing traversal by actual mapping granularity instead of PAGE_SIZE steps.
Signed-off-by: Guanghui Feng <[email protected]> Acked-by: Shiqiang Zhang <[email protected]> Acked-by: Simon Guo <[email protected]> --- drivers/iommu/iommufd/pages.c | 75 +++++++++++++++++++++++++++----- drivers/iommu/iommufd/selftest.c | 2 +- drivers/vfio/vfio_iommu_type1.c | 26 ++++++++--- 3 files changed, 85 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 9bdb2945afe1..aed05bd0b01c 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -417,17 +417,44 @@ static void batch_from_domain(struct pfn_batch *batch, if (start_index == iopt_area_index(area)) page_offset = area->page_offset; while (start_index <= last_index) { + size_t pgsize; + unsigned long npages; + unsigned long i; + /* - * This is pretty slow, it would be nice to get the page size - * back from the driver, or have the driver directly fill the - * batch. + * Use iova_to_phys_length to get both the physical address + * and the PTE page size in a single page table walk, allowing + * us to skip ahead by the contiguous region size instead of + * walking the page tables for every PAGE_SIZE step. */ - phys = iommu_iova_to_phys(domain, iova) - page_offset; - if (!batch_add_pfn(batch, PHYS_PFN(phys))) - return; - iova += PAGE_SIZE - page_offset; + phys = iommu_iova_to_phys_length(domain, iova, &pgsize); + if (WARN_ON(phys == PHYS_ADDR_MAX)) + break; + phys -= page_offset; + if (WARN_ON(!pgsize || pgsize < PAGE_SIZE)) + pgsize = PAGE_SIZE; + + /* + * Calculate contiguous pages within this PTE from our + * position. phys points to the page-aligned start (backed + * up by page_offset), so pages available = bytes from phys + * to PTE end divided by PAGE_SIZE. + */ + npages = (pgsize - (iova & (pgsize - 1)) + page_offset) / + PAGE_SIZE; + npages = min_t(unsigned long, npages, + last_index - start_index + 1); + if (!npages) + npages = 1; + + for (i = 0; i < npages; i++) { + if (!batch_add_pfn(batch, PHYS_PFN(phys) + i)) + return; + } + + iova += npages * PAGE_SIZE - page_offset; page_offset = 0; - start_index++; + start_index += npages; } } @@ -445,11 +472,35 @@ static struct page **raw_pages_from_domain(struct iommu_domain *domain, if (start_index == iopt_area_index(area)) page_offset = area->page_offset; while (start_index <= last_index) { - phys = iommu_iova_to_phys(domain, iova) - page_offset; - *(out_pages++) = pfn_to_page(PHYS_PFN(phys)); - iova += PAGE_SIZE - page_offset; + size_t pgsize; + unsigned long npages; + unsigned long i; + + /* + * Resolve the PTE page size together with the physical + * address so we can fill multiple struct page pointers per + * page table walk when the IOMMU uses large pages. + */ + phys = iommu_iova_to_phys_length(domain, iova, &pgsize); + if (WARN_ON(phys == PHYS_ADDR_MAX)) + break; + phys -= page_offset; + if (WARN_ON(!pgsize || pgsize < PAGE_SIZE)) + pgsize = PAGE_SIZE; + + npages = (pgsize - (iova & (pgsize - 1)) + page_offset) / + PAGE_SIZE; + npages = min_t(unsigned long, npages, + last_index - start_index + 1); + if (!npages) + npages = 1; + + for (i = 0; i < npages; i++) + *(out_pages++) = pfn_to_page(PHYS_PFN(phys) + i); + + iova += npages * PAGE_SIZE - page_offset; page_offset = 0; - start_index++; + start_index += npages; } return out_pages; } diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index af07c642a526..4b9c3ffc9523 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -1214,7 +1214,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, pfn = page_to_pfn(pages[0]); put_page(pages[0]); - io_phys = mock->domain.ops->iova_to_phys(&mock->domain, iova); + io_phys = iommu_iova_to_phys(&mock->domain, iova); if (io_phys != pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) { rc = -EINVAL; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c8151ba54de3..c86315b1fcda 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1177,25 +1177,41 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, iommu_iotlb_gather_init(&iotlb_gather); while (pos < dma->size) { - size_t unmapped, len; + size_t unmapped, len, pgsize; phys_addr_t phys, next; dma_addr_t iova = dma->iova + pos; - phys = iommu_iova_to_phys(domain->domain, iova); - if (WARN_ON(!phys)) { + /* Single page table walk returns both phys and PTE size */ + phys = iommu_iova_to_phys_length(domain->domain, iova, + &pgsize); + if (WARN_ON(phys == PHYS_ADDR_MAX)) { pos += PAGE_SIZE; continue; } + if (WARN_ON(!pgsize || pgsize < PAGE_SIZE)) + pgsize = PAGE_SIZE; /* * To optimize for fewer iommu_unmap() calls, each of which * may require hardware cache flushing, try to find the * largest contiguous physical memory chunk to unmap. + * + * Calculate remaining contiguous bytes within this PTE from + * our position, then try to join following physically + * contiguous PTEs. */ - for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) { - next = iommu_iova_to_phys(domain->domain, iova + len); + len = pgsize - (iova & (pgsize - 1)); + for (; pos + len < dma->size; ) { + size_t next_pgsize; + + next = iommu_iova_to_phys_length(domain->domain, + iova + len, + &next_pgsize); if (next != phys + len) break; + if (WARN_ON(!next_pgsize || next_pgsize < PAGE_SIZE)) + next_pgsize = PAGE_SIZE; + len += next_pgsize; } /* -- 2.43.7
