iommufd: use iova_to_phys_length for efficient unmap

Guanghui Feng Tue, 02 Jun 2026 03:47:21 -0700

Use iommu_iova_to_phys_length() to get PTE page size, allowing
traversal by actual mapping granularity instead of PAGE_SIZE steps.


Signed-off-by: Guanghui Feng <[email protected]>
Acked-by: Shiqiang Zhang <[email protected]>
Acked-by: Simon Guo <[email protected]>
---
 drivers/iommu/iommufd/pages.c    | 75 +++++++++++++++++++++++++++-----
 drivers/iommu/iommufd/selftest.c |  2 +-
 drivers/vfio/vfio_iommu_type1.c  | 26 ++++++++---
 3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 9bdb2945afe1..aed05bd0b01c 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -417,17 +417,44 @@ static void batch_from_domain(struct pfn_batch *batch,
        if (start_index == iopt_area_index(area))
                page_offset = area->page_offset;
        while (start_index <= last_index) {
+               size_t pgsize;
+               unsigned long npages;
+               unsigned long i;
+
                /*
-                * This is pretty slow, it would be nice to get the page size
-                * back from the driver, or have the driver directly fill the
-                * batch.
+                * Use iova_to_phys_length to get both the physical address
+                * and the PTE page size in a single page table walk, allowing
+                * us to skip ahead by the contiguous region size instead of
+                * walking the page tables for every PAGE_SIZE step.
                 */
-               phys = iommu_iova_to_phys(domain, iova) - page_offset;
-               if (!batch_add_pfn(batch, PHYS_PFN(phys)))
-                       return;
-               iova += PAGE_SIZE - page_offset;
+               phys = iommu_iova_to_phys_length(domain, iova, &pgsize);
+               if (WARN_ON(phys == PHYS_ADDR_MAX))
+                       break;
+               phys -= page_offset;
+               if (WARN_ON(!pgsize || pgsize < PAGE_SIZE))
+                       pgsize = PAGE_SIZE;
+
+               /*
+                * Calculate contiguous pages within this PTE from our
+                * position. phys points to the page-aligned start (backed
+                * up by page_offset), so pages available = bytes from phys
+                * to PTE end divided by PAGE_SIZE.
+                */
+               npages = (pgsize - (iova & (pgsize - 1)) + page_offset) /
+                        PAGE_SIZE;
+               npages = min_t(unsigned long, npages,
+                              last_index - start_index + 1);
+               if (!npages)
+                       npages = 1;
+
+               for (i = 0; i < npages; i++) {
+                       if (!batch_add_pfn(batch, PHYS_PFN(phys) + i))
+                               return;
+               }
+
+               iova += npages * PAGE_SIZE - page_offset;
                page_offset = 0;
-               start_index++;
+               start_index += npages;
        }
 }
 
@@ -445,11 +472,35 @@ static struct page **raw_pages_from_domain(struct 
iommu_domain *domain,
        if (start_index == iopt_area_index(area))
                page_offset = area->page_offset;
        while (start_index <= last_index) {
-               phys = iommu_iova_to_phys(domain, iova) - page_offset;
-               *(out_pages++) = pfn_to_page(PHYS_PFN(phys));
-               iova += PAGE_SIZE - page_offset;
+               size_t pgsize;
+               unsigned long npages;
+               unsigned long i;
+
+               /*
+                * Resolve the PTE page size together with the physical
+                * address so we can fill multiple struct page pointers per
+                * page table walk when the IOMMU uses large pages.
+                */
+               phys = iommu_iova_to_phys_length(domain, iova, &pgsize);
+               if (WARN_ON(phys == PHYS_ADDR_MAX))
+                       break;
+               phys -= page_offset;
+               if (WARN_ON(!pgsize || pgsize < PAGE_SIZE))
+                       pgsize = PAGE_SIZE;
+
+               npages = (pgsize - (iova & (pgsize - 1)) + page_offset) /
+                        PAGE_SIZE;
+               npages = min_t(unsigned long, npages,
+                              last_index - start_index + 1);
+               if (!npages)
+                       npages = 1;
+
+               for (i = 0; i < npages; i++)
+                       *(out_pages++) = pfn_to_page(PHYS_PFN(phys) + i);
+
+               iova += npages * PAGE_SIZE - page_offset;
                page_offset = 0;
-               start_index++;
+               start_index += npages;
        }
        return out_pages;
 }
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index af07c642a526..4b9c3ffc9523 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -1214,7 +1214,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd 
*ucmd,
                pfn = page_to_pfn(pages[0]);
                put_page(pages[0]);
 
-               io_phys = mock->domain.ops->iova_to_phys(&mock->domain, iova);
+               io_phys = iommu_iova_to_phys(&mock->domain, iova);
                if (io_phys !=
                    pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) {
                        rc = -EINVAL;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c8151ba54de3..c86315b1fcda 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1177,25 +1177,41 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, 
struct vfio_dma *dma,
 
        iommu_iotlb_gather_init(&iotlb_gather);
        while (pos < dma->size) {
-               size_t unmapped, len;
+               size_t unmapped, len, pgsize;
                phys_addr_t phys, next;
                dma_addr_t iova = dma->iova + pos;
 
-               phys = iommu_iova_to_phys(domain->domain, iova);
-               if (WARN_ON(!phys)) {
+               /* Single page table walk returns both phys and PTE size */
+               phys = iommu_iova_to_phys_length(domain->domain, iova,
+                                                 &pgsize);
+               if (WARN_ON(phys == PHYS_ADDR_MAX)) {
                        pos += PAGE_SIZE;
                        continue;
                }
+               if (WARN_ON(!pgsize || pgsize < PAGE_SIZE))
+                       pgsize = PAGE_SIZE;
 
                /*
                 * To optimize for fewer iommu_unmap() calls, each of which
                 * may require hardware cache flushing, try to find the
                 * largest contiguous physical memory chunk to unmap.
+                *
+                * Calculate remaining contiguous bytes within this PTE from
+                * our position, then try to join following physically
+                * contiguous PTEs.
                 */
-               for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) {
-                       next = iommu_iova_to_phys(domain->domain, iova + len);
+               len = pgsize - (iova & (pgsize - 1));
+               for (; pos + len < dma->size; ) {
+                       size_t next_pgsize;
+
+                       next = iommu_iova_to_phys_length(domain->domain,
+                                                         iova + len,
+                                                         &next_pgsize);
                        if (next != phys + len)
                                break;
+                       if (WARN_ON(!next_pgsize || next_pgsize < PAGE_SIZE))
+                               next_pgsize = PAGE_SIZE;
+                       len += next_pgsize;
                }
 
                /*
-- 
2.43.7

[PATCH v2 23/30] vfio/iommufd: use iova_to_phys_length for efficient unmap

Reply via email to